avsolatorio commited on
Commit
6cf61c8
·
verified ·
1 Parent(s): 6f5aa69

Training in progress, step 1000

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. pytorch_model.bin +1 -1
  2. snapshot/best/1_Pooling/config.json +7 -0
  3. snapshot/best/2_MixtureEmbeddingsModel/MixSentenceTransformer_blender_bert_config.json +82 -0
  4. snapshot/best/2_MixtureEmbeddingsModel/MixSentenceTransformer_config.json +24 -0
  5. snapshot/best/2_MixtureEmbeddingsModel/adapters.bin +3 -0
  6. snapshot/best/2_MixtureEmbeddingsModel/blender.bin +3 -0
  7. snapshot/best/2_MixtureEmbeddingsModel/blender_position_embeddings.bin +3 -0
  8. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/1_Pooling/config.json +7 -0
  9. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/README.md +2689 -0
  10. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/config.json +31 -0
  11. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/config_sentence_transformers.json +7 -0
  12. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/model.safetensors +3 -0
  13. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/modules.json +14 -0
  14. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/sentence_bert_config.json +4 -0
  15. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/special_tokens_map.json +44 -0
  16. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/tokenizer.json +0 -0
  17. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/tokenizer_config.json +71 -0
  18. snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/vocab.txt +0 -0
  19. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/1_Pooling/config.json +7 -0
  20. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/README.md +3012 -0
  21. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/config.json +32 -0
  22. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/config_sentence_transformers.json +7 -0
  23. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/model.safetensors +3 -0
  24. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/modules.json +20 -0
  25. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/sentence_bert_config.json +4 -0
  26. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/special_tokens_map.json +37 -0
  27. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/tokenizer.json +0 -0
  28. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/tokenizer_config.json +57 -0
  29. snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/vocab.txt +0 -0
  30. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/1_Pooling/config.json +7 -0
  31. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/README.md +2702 -0
  32. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/config.json +25 -0
  33. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/config_sentence_transformers.json +7 -0
  34. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/model.safetensors +3 -0
  35. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/modules.json +20 -0
  36. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/sentence_bert_config.json +4 -0
  37. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/special_tokens_map.json +37 -0
  38. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/tokenizer.json +0 -0
  39. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/tokenizer_config.json +64 -0
  40. snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/vocab.txt +0 -0
  41. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/1_Pooling/config.json +7 -0
  42. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/README.md +176 -0
  43. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/config.json +26 -0
  44. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/config_sentence_transformers.json +7 -0
  45. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/model.safetensors +3 -0
  46. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/modules.json +20 -0
  47. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/sentence_bert_config.json +4 -0
  48. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/special_tokens_map.json +37 -0
  49. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/tokenizer.json +0 -0
  50. snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/tokenizer_config.json +64 -0
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b73b90292a6faa31e73fee3251b449d010e9630f4f920558f0dc206ed994eaa
3
  size 636548706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cace4fe39245b53765e26bfe0d36926846da3bec48258f628ce061393ef76403
3
  size 636548706
snapshot/best/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/MixSentenceTransformer_blender_bert_config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "return_dict": true,
3
+ "output_hidden_states": false,
4
+ "output_attentions": false,
5
+ "torchscript": false,
6
+ "torch_dtype": null,
7
+ "use_bfloat16": false,
8
+ "tf_legacy_loss": false,
9
+ "pruned_heads": {},
10
+ "tie_word_embeddings": true,
11
+ "is_encoder_decoder": false,
12
+ "is_decoder": false,
13
+ "cross_attention_hidden_size": null,
14
+ "add_cross_attention": false,
15
+ "tie_encoder_decoder": false,
16
+ "max_length": 20,
17
+ "min_length": 0,
18
+ "do_sample": false,
19
+ "early_stopping": false,
20
+ "num_beams": 1,
21
+ "num_beam_groups": 1,
22
+ "diversity_penalty": 0.0,
23
+ "temperature": 1.0,
24
+ "top_k": 50,
25
+ "top_p": 1.0,
26
+ "typical_p": 1.0,
27
+ "repetition_penalty": 1.0,
28
+ "length_penalty": 1.0,
29
+ "no_repeat_ngram_size": 0,
30
+ "encoder_no_repeat_ngram_size": 0,
31
+ "bad_words_ids": null,
32
+ "num_return_sequences": 1,
33
+ "chunk_size_feed_forward": 0,
34
+ "output_scores": false,
35
+ "return_dict_in_generate": false,
36
+ "forced_bos_token_id": null,
37
+ "forced_eos_token_id": null,
38
+ "remove_invalid_values": false,
39
+ "exponential_decay_length_penalty": null,
40
+ "suppress_tokens": null,
41
+ "begin_suppress_tokens": null,
42
+ "architectures": [
43
+ "BertForMaskedLM"
44
+ ],
45
+ "finetuning_task": null,
46
+ "id2label": {
47
+ "0": "LABEL_0",
48
+ "1": "LABEL_1"
49
+ },
50
+ "label2id": {
51
+ "LABEL_0": 0,
52
+ "LABEL_1": 1
53
+ },
54
+ "tokenizer_class": null,
55
+ "prefix": null,
56
+ "bos_token_id": null,
57
+ "pad_token_id": 0,
58
+ "eos_token_id": null,
59
+ "sep_token_id": null,
60
+ "decoder_start_token_id": null,
61
+ "task_specific_params": null,
62
+ "problem_type": null,
63
+ "_name_or_path": "bert-base-uncased",
64
+ "transformers_version": "4.36.2",
65
+ "gradient_checkpointing": false,
66
+ "model_type": "bert",
67
+ "vocab_size": 30522,
68
+ "hidden_size": 768,
69
+ "num_hidden_layers": 3,
70
+ "num_attention_heads": 6,
71
+ "hidden_act": "gelu",
72
+ "intermediate_size": 768,
73
+ "hidden_dropout_prob": 0.1,
74
+ "attention_probs_dropout_prob": 0.1,
75
+ "max_position_embeddings": 512,
76
+ "type_vocab_size": 2,
77
+ "initializer_range": 0.02,
78
+ "layer_norm_eps": 1e-12,
79
+ "position_embedding_type": "absolute",
80
+ "use_cache": true,
81
+ "classifier_dropout": null
82
+ }
snapshot/best/2_MixtureEmbeddingsModel/MixSentenceTransformer_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "expert_model_names": [
3
+ "TaylorAI/bge-micro-v2",
4
+ "khoa-klaytn/bge-small-en-v1.5-angle",
5
+ "thenlper/gte-small",
6
+ "sentence-transformers/all-MiniLM-L6-v2",
7
+ "TaylorAI/gte-tiny"
8
+ ],
9
+ "encoder_dim": 384,
10
+ "topk": 5,
11
+ "freeze_experts": false,
12
+ "normalize_experts": false,
13
+ "has_blender": true,
14
+ "has_noise": false,
15
+ "use_encoder_expert": false,
16
+ "use_gate_norm_last": false,
17
+ "has_layernorm": false,
18
+ "output_dim": 768,
19
+ "blender_mode": "bert",
20
+ "use_gate_random": false,
21
+ "gate_temp_max_steps": 50000,
22
+ "gate_max_temp": 3.0,
23
+ "gate_bias": false
24
+ }
snapshot/best/2_MixtureEmbeddingsModel/adapters.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5328d7c9de6829152de4499a3a4a55f61b34d4615f5bb05a4f4c80559369c635
3
+ size 5917382
snapshot/best/2_MixtureEmbeddingsModel/blender.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620872d877d77e64d389c15d6dc889f3ff3f866b87306c962755189e2de3a574
3
+ size 42577286
snapshot/best/2_MixtureEmbeddingsModel/blender_position_embeddings.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d78788832c39bbaf7e58af2b39d4761291b152324d7df3a987244183b30f949a
3
+ size 16832
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/README.md ADDED
@@ -0,0 +1,2689 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: sentence-similarity
3
+ tags:
4
+ - sentence-transformers
5
+ - feature-extraction
6
+ - sentence-similarity
7
+ - transformers
8
+ - mteb
9
+ model-index:
10
+ - name: bge_micro
11
+ results:
12
+ - task:
13
+ type: Classification
14
+ dataset:
15
+ type: mteb/amazon_counterfactual
16
+ name: MTEB AmazonCounterfactualClassification (en)
17
+ config: en
18
+ split: test
19
+ revision: e8379541af4e31359cca9fbcf4b00f2671dba205
20
+ metrics:
21
+ - type: accuracy
22
+ value: 67.76119402985074
23
+ - type: ap
24
+ value: 29.637849284211114
25
+ - type: f1
26
+ value: 61.31181187111905
27
+ - task:
28
+ type: Classification
29
+ dataset:
30
+ type: mteb/amazon_polarity
31
+ name: MTEB AmazonPolarityClassification
32
+ config: default
33
+ split: test
34
+ revision: e2d317d38cd51312af73b3d32a06d1a08b442046
35
+ metrics:
36
+ - type: accuracy
37
+ value: 79.7547
38
+ - type: ap
39
+ value: 74.21401629809145
40
+ - type: f1
41
+ value: 79.65319615433783
42
+ - task:
43
+ type: Classification
44
+ dataset:
45
+ type: mteb/amazon_reviews_multi
46
+ name: MTEB AmazonReviewsClassification (en)
47
+ config: en
48
+ split: test
49
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
50
+ metrics:
51
+ - type: accuracy
52
+ value: 37.452000000000005
53
+ - type: f1
54
+ value: 37.0245198854966
55
+ - task:
56
+ type: Retrieval
57
+ dataset:
58
+ type: arguana
59
+ name: MTEB ArguAna
60
+ config: default
61
+ split: test
62
+ revision: None
63
+ metrics:
64
+ - type: map_at_1
65
+ value: 31.152
66
+ - type: map_at_10
67
+ value: 46.702
68
+ - type: map_at_100
69
+ value: 47.563
70
+ - type: map_at_1000
71
+ value: 47.567
72
+ - type: map_at_3
73
+ value: 42.058
74
+ - type: map_at_5
75
+ value: 44.608
76
+ - type: mrr_at_1
77
+ value: 32.006
78
+ - type: mrr_at_10
79
+ value: 47.064
80
+ - type: mrr_at_100
81
+ value: 47.910000000000004
82
+ - type: mrr_at_1000
83
+ value: 47.915
84
+ - type: mrr_at_3
85
+ value: 42.283
86
+ - type: mrr_at_5
87
+ value: 44.968
88
+ - type: ndcg_at_1
89
+ value: 31.152
90
+ - type: ndcg_at_10
91
+ value: 55.308
92
+ - type: ndcg_at_100
93
+ value: 58.965
94
+ - type: ndcg_at_1000
95
+ value: 59.067
96
+ - type: ndcg_at_3
97
+ value: 45.698
98
+ - type: ndcg_at_5
99
+ value: 50.296
100
+ - type: precision_at_1
101
+ value: 31.152
102
+ - type: precision_at_10
103
+ value: 8.279
104
+ - type: precision_at_100
105
+ value: 0.987
106
+ - type: precision_at_1000
107
+ value: 0.1
108
+ - type: precision_at_3
109
+ value: 18.753
110
+ - type: precision_at_5
111
+ value: 13.485
112
+ - type: recall_at_1
113
+ value: 31.152
114
+ - type: recall_at_10
115
+ value: 82.788
116
+ - type: recall_at_100
117
+ value: 98.72
118
+ - type: recall_at_1000
119
+ value: 99.502
120
+ - type: recall_at_3
121
+ value: 56.259
122
+ - type: recall_at_5
123
+ value: 67.425
124
+ - task:
125
+ type: Clustering
126
+ dataset:
127
+ type: mteb/arxiv-clustering-p2p
128
+ name: MTEB ArxivClusteringP2P
129
+ config: default
130
+ split: test
131
+ revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
132
+ metrics:
133
+ - type: v_measure
134
+ value: 44.52692241938116
135
+ - task:
136
+ type: Clustering
137
+ dataset:
138
+ type: mteb/arxiv-clustering-s2s
139
+ name: MTEB ArxivClusteringS2S
140
+ config: default
141
+ split: test
142
+ revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
143
+ metrics:
144
+ - type: v_measure
145
+ value: 33.245710292773595
146
+ - task:
147
+ type: Reranking
148
+ dataset:
149
+ type: mteb/askubuntudupquestions-reranking
150
+ name: MTEB AskUbuntuDupQuestions
151
+ config: default
152
+ split: test
153
+ revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
154
+ metrics:
155
+ - type: map
156
+ value: 58.08493637155168
157
+ - type: mrr
158
+ value: 71.94378490084861
159
+ - task:
160
+ type: STS
161
+ dataset:
162
+ type: mteb/biosses-sts
163
+ name: MTEB BIOSSES
164
+ config: default
165
+ split: test
166
+ revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
167
+ metrics:
168
+ - type: cos_sim_pearson
169
+ value: 84.1602804378326
170
+ - type: cos_sim_spearman
171
+ value: 82.92478106365587
172
+ - type: euclidean_pearson
173
+ value: 82.27930167277077
174
+ - type: euclidean_spearman
175
+ value: 82.18560759458093
176
+ - type: manhattan_pearson
177
+ value: 82.34277425888187
178
+ - type: manhattan_spearman
179
+ value: 81.72776583704467
180
+ - task:
181
+ type: Classification
182
+ dataset:
183
+ type: mteb/banking77
184
+ name: MTEB Banking77Classification
185
+ config: default
186
+ split: test
187
+ revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
188
+ metrics:
189
+ - type: accuracy
190
+ value: 81.17207792207792
191
+ - type: f1
192
+ value: 81.09893836310513
193
+ - task:
194
+ type: Clustering
195
+ dataset:
196
+ type: mteb/biorxiv-clustering-p2p
197
+ name: MTEB BiorxivClusteringP2P
198
+ config: default
199
+ split: test
200
+ revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
201
+ metrics:
202
+ - type: v_measure
203
+ value: 36.109308463095516
204
+ - task:
205
+ type: Clustering
206
+ dataset:
207
+ type: mteb/biorxiv-clustering-s2s
208
+ name: MTEB BiorxivClusteringS2S
209
+ config: default
210
+ split: test
211
+ revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
212
+ metrics:
213
+ - type: v_measure
214
+ value: 28.06048212317168
215
+ - task:
216
+ type: Retrieval
217
+ dataset:
218
+ type: BeIR/cqadupstack
219
+ name: MTEB CQADupstackAndroidRetrieval
220
+ config: default
221
+ split: test
222
+ revision: None
223
+ metrics:
224
+ - type: map_at_1
225
+ value: 28.233999999999998
226
+ - type: map_at_10
227
+ value: 38.092999999999996
228
+ - type: map_at_100
229
+ value: 39.473
230
+ - type: map_at_1000
231
+ value: 39.614
232
+ - type: map_at_3
233
+ value: 34.839
234
+ - type: map_at_5
235
+ value: 36.523
236
+ - type: mrr_at_1
237
+ value: 35.193000000000005
238
+ - type: mrr_at_10
239
+ value: 44.089
240
+ - type: mrr_at_100
241
+ value: 44.927
242
+ - type: mrr_at_1000
243
+ value: 44.988
244
+ - type: mrr_at_3
245
+ value: 41.559000000000005
246
+ - type: mrr_at_5
247
+ value: 43.162
248
+ - type: ndcg_at_1
249
+ value: 35.193000000000005
250
+ - type: ndcg_at_10
251
+ value: 44.04
252
+ - type: ndcg_at_100
253
+ value: 49.262
254
+ - type: ndcg_at_1000
255
+ value: 51.847
256
+ - type: ndcg_at_3
257
+ value: 39.248
258
+ - type: ndcg_at_5
259
+ value: 41.298
260
+ - type: precision_at_1
261
+ value: 35.193000000000005
262
+ - type: precision_at_10
263
+ value: 8.555
264
+ - type: precision_at_100
265
+ value: 1.3820000000000001
266
+ - type: precision_at_1000
267
+ value: 0.189
268
+ - type: precision_at_3
269
+ value: 19.123
270
+ - type: precision_at_5
271
+ value: 13.648
272
+ - type: recall_at_1
273
+ value: 28.233999999999998
274
+ - type: recall_at_10
275
+ value: 55.094
276
+ - type: recall_at_100
277
+ value: 76.85300000000001
278
+ - type: recall_at_1000
279
+ value: 94.163
280
+ - type: recall_at_3
281
+ value: 40.782000000000004
282
+ - type: recall_at_5
283
+ value: 46.796
284
+ - task:
285
+ type: Retrieval
286
+ dataset:
287
+ type: BeIR/cqadupstack
288
+ name: MTEB CQADupstackEnglishRetrieval
289
+ config: default
290
+ split: test
291
+ revision: None
292
+ metrics:
293
+ - type: map_at_1
294
+ value: 21.538
295
+ - type: map_at_10
296
+ value: 28.449
297
+ - type: map_at_100
298
+ value: 29.471000000000004
299
+ - type: map_at_1000
300
+ value: 29.599999999999998
301
+ - type: map_at_3
302
+ value: 26.371
303
+ - type: map_at_5
304
+ value: 27.58
305
+ - type: mrr_at_1
306
+ value: 26.815
307
+ - type: mrr_at_10
308
+ value: 33.331
309
+ - type: mrr_at_100
310
+ value: 34.114
311
+ - type: mrr_at_1000
312
+ value: 34.182
313
+ - type: mrr_at_3
314
+ value: 31.561
315
+ - type: mrr_at_5
316
+ value: 32.608
317
+ - type: ndcg_at_1
318
+ value: 26.815
319
+ - type: ndcg_at_10
320
+ value: 32.67
321
+ - type: ndcg_at_100
322
+ value: 37.039
323
+ - type: ndcg_at_1000
324
+ value: 39.769
325
+ - type: ndcg_at_3
326
+ value: 29.523
327
+ - type: ndcg_at_5
328
+ value: 31.048
329
+ - type: precision_at_1
330
+ value: 26.815
331
+ - type: precision_at_10
332
+ value: 5.955
333
+ - type: precision_at_100
334
+ value: 1.02
335
+ - type: precision_at_1000
336
+ value: 0.152
337
+ - type: precision_at_3
338
+ value: 14.033999999999999
339
+ - type: precision_at_5
340
+ value: 9.911
341
+ - type: recall_at_1
342
+ value: 21.538
343
+ - type: recall_at_10
344
+ value: 40.186
345
+ - type: recall_at_100
346
+ value: 58.948
347
+ - type: recall_at_1000
348
+ value: 77.158
349
+ - type: recall_at_3
350
+ value: 30.951
351
+ - type: recall_at_5
352
+ value: 35.276
353
+ - task:
354
+ type: Retrieval
355
+ dataset:
356
+ type: BeIR/cqadupstack
357
+ name: MTEB CQADupstackGamingRetrieval
358
+ config: default
359
+ split: test
360
+ revision: None
361
+ metrics:
362
+ - type: map_at_1
363
+ value: 35.211999999999996
364
+ - type: map_at_10
365
+ value: 46.562
366
+ - type: map_at_100
367
+ value: 47.579
368
+ - type: map_at_1000
369
+ value: 47.646
370
+ - type: map_at_3
371
+ value: 43.485
372
+ - type: map_at_5
373
+ value: 45.206
374
+ - type: mrr_at_1
375
+ value: 40.627
376
+ - type: mrr_at_10
377
+ value: 49.928
378
+ - type: mrr_at_100
379
+ value: 50.647
380
+ - type: mrr_at_1000
381
+ value: 50.685
382
+ - type: mrr_at_3
383
+ value: 47.513
384
+ - type: mrr_at_5
385
+ value: 48.958
386
+ - type: ndcg_at_1
387
+ value: 40.627
388
+ - type: ndcg_at_10
389
+ value: 52.217
390
+ - type: ndcg_at_100
391
+ value: 56.423
392
+ - type: ndcg_at_1000
393
+ value: 57.821999999999996
394
+ - type: ndcg_at_3
395
+ value: 46.949000000000005
396
+ - type: ndcg_at_5
397
+ value: 49.534
398
+ - type: precision_at_1
399
+ value: 40.627
400
+ - type: precision_at_10
401
+ value: 8.476
402
+ - type: precision_at_100
403
+ value: 1.15
404
+ - type: precision_at_1000
405
+ value: 0.132
406
+ - type: precision_at_3
407
+ value: 21.003
408
+ - type: precision_at_5
409
+ value: 14.469999999999999
410
+ - type: recall_at_1
411
+ value: 35.211999999999996
412
+ - type: recall_at_10
413
+ value: 65.692
414
+ - type: recall_at_100
415
+ value: 84.011
416
+ - type: recall_at_1000
417
+ value: 94.03099999999999
418
+ - type: recall_at_3
419
+ value: 51.404
420
+ - type: recall_at_5
421
+ value: 57.882
422
+ - task:
423
+ type: Retrieval
424
+ dataset:
425
+ type: BeIR/cqadupstack
426
+ name: MTEB CQADupstackGisRetrieval
427
+ config: default
428
+ split: test
429
+ revision: None
430
+ metrics:
431
+ - type: map_at_1
432
+ value: 22.09
433
+ - type: map_at_10
434
+ value: 29.516
435
+ - type: map_at_100
436
+ value: 30.462
437
+ - type: map_at_1000
438
+ value: 30.56
439
+ - type: map_at_3
440
+ value: 26.945000000000004
441
+ - type: map_at_5
442
+ value: 28.421999999999997
443
+ - type: mrr_at_1
444
+ value: 23.616
445
+ - type: mrr_at_10
446
+ value: 31.221
447
+ - type: mrr_at_100
448
+ value: 32.057
449
+ - type: mrr_at_1000
450
+ value: 32.137
451
+ - type: mrr_at_3
452
+ value: 28.738000000000003
453
+ - type: mrr_at_5
454
+ value: 30.156
455
+ - type: ndcg_at_1
456
+ value: 23.616
457
+ - type: ndcg_at_10
458
+ value: 33.97
459
+ - type: ndcg_at_100
460
+ value: 38.806000000000004
461
+ - type: ndcg_at_1000
462
+ value: 41.393
463
+ - type: ndcg_at_3
464
+ value: 28.908
465
+ - type: ndcg_at_5
466
+ value: 31.433
467
+ - type: precision_at_1
468
+ value: 23.616
469
+ - type: precision_at_10
470
+ value: 5.299
471
+ - type: precision_at_100
472
+ value: 0.812
473
+ - type: precision_at_1000
474
+ value: 0.107
475
+ - type: precision_at_3
476
+ value: 12.015
477
+ - type: precision_at_5
478
+ value: 8.701
479
+ - type: recall_at_1
480
+ value: 22.09
481
+ - type: recall_at_10
482
+ value: 46.089999999999996
483
+ - type: recall_at_100
484
+ value: 68.729
485
+ - type: recall_at_1000
486
+ value: 88.435
487
+ - type: recall_at_3
488
+ value: 32.584999999999994
489
+ - type: recall_at_5
490
+ value: 38.550000000000004
491
+ - task:
492
+ type: Retrieval
493
+ dataset:
494
+ type: BeIR/cqadupstack
495
+ name: MTEB CQADupstackMathematicaRetrieval
496
+ config: default
497
+ split: test
498
+ revision: None
499
+ metrics:
500
+ - type: map_at_1
501
+ value: 15.469
502
+ - type: map_at_10
503
+ value: 22.436
504
+ - type: map_at_100
505
+ value: 23.465
506
+ - type: map_at_1000
507
+ value: 23.608999999999998
508
+ - type: map_at_3
509
+ value: 19.716
510
+ - type: map_at_5
511
+ value: 21.182000000000002
512
+ - type: mrr_at_1
513
+ value: 18.905
514
+ - type: mrr_at_10
515
+ value: 26.55
516
+ - type: mrr_at_100
517
+ value: 27.46
518
+ - type: mrr_at_1000
519
+ value: 27.553
520
+ - type: mrr_at_3
521
+ value: 23.921999999999997
522
+ - type: mrr_at_5
523
+ value: 25.302999999999997
524
+ - type: ndcg_at_1
525
+ value: 18.905
526
+ - type: ndcg_at_10
527
+ value: 27.437
528
+ - type: ndcg_at_100
529
+ value: 32.555
530
+ - type: ndcg_at_1000
531
+ value: 35.885
532
+ - type: ndcg_at_3
533
+ value: 22.439
534
+ - type: ndcg_at_5
535
+ value: 24.666
536
+ - type: precision_at_1
537
+ value: 18.905
538
+ - type: precision_at_10
539
+ value: 5.2490000000000006
540
+ - type: precision_at_100
541
+ value: 0.889
542
+ - type: precision_at_1000
543
+ value: 0.131
544
+ - type: precision_at_3
545
+ value: 10.862
546
+ - type: precision_at_5
547
+ value: 8.085
548
+ - type: recall_at_1
549
+ value: 15.469
550
+ - type: recall_at_10
551
+ value: 38.706
552
+ - type: recall_at_100
553
+ value: 61.242
554
+ - type: recall_at_1000
555
+ value: 84.84
556
+ - type: recall_at_3
557
+ value: 24.973
558
+ - type: recall_at_5
559
+ value: 30.603
560
+ - task:
561
+ type: Retrieval
562
+ dataset:
563
+ type: BeIR/cqadupstack
564
+ name: MTEB CQADupstackPhysicsRetrieval
565
+ config: default
566
+ split: test
567
+ revision: None
568
+ metrics:
569
+ - type: map_at_1
570
+ value: 24.918000000000003
571
+ - type: map_at_10
572
+ value: 34.296
573
+ - type: map_at_100
574
+ value: 35.632000000000005
575
+ - type: map_at_1000
576
+ value: 35.748999999999995
577
+ - type: map_at_3
578
+ value: 31.304
579
+ - type: map_at_5
580
+ value: 33.166000000000004
581
+ - type: mrr_at_1
582
+ value: 30.703000000000003
583
+ - type: mrr_at_10
584
+ value: 39.655
585
+ - type: mrr_at_100
586
+ value: 40.569
587
+ - type: mrr_at_1000
588
+ value: 40.621
589
+ - type: mrr_at_3
590
+ value: 37.023
591
+ - type: mrr_at_5
592
+ value: 38.664
593
+ - type: ndcg_at_1
594
+ value: 30.703000000000003
595
+ - type: ndcg_at_10
596
+ value: 39.897
597
+ - type: ndcg_at_100
598
+ value: 45.777
599
+ - type: ndcg_at_1000
600
+ value: 48.082
601
+ - type: ndcg_at_3
602
+ value: 35.122
603
+ - type: ndcg_at_5
604
+ value: 37.691
605
+ - type: precision_at_1
606
+ value: 30.703000000000003
607
+ - type: precision_at_10
608
+ value: 7.305000000000001
609
+ - type: precision_at_100
610
+ value: 1.208
611
+ - type: precision_at_1000
612
+ value: 0.159
613
+ - type: precision_at_3
614
+ value: 16.811
615
+ - type: precision_at_5
616
+ value: 12.203999999999999
617
+ - type: recall_at_1
618
+ value: 24.918000000000003
619
+ - type: recall_at_10
620
+ value: 51.31
621
+ - type: recall_at_100
622
+ value: 76.534
623
+ - type: recall_at_1000
624
+ value: 91.911
625
+ - type: recall_at_3
626
+ value: 37.855
627
+ - type: recall_at_5
628
+ value: 44.493
629
+ - task:
630
+ type: Retrieval
631
+ dataset:
632
+ type: BeIR/cqadupstack
633
+ name: MTEB CQADupstackProgrammersRetrieval
634
+ config: default
635
+ split: test
636
+ revision: None
637
+ metrics:
638
+ - type: map_at_1
639
+ value: 22.416
640
+ - type: map_at_10
641
+ value: 30.474
642
+ - type: map_at_100
643
+ value: 31.759999999999998
644
+ - type: map_at_1000
645
+ value: 31.891000000000002
646
+ - type: map_at_3
647
+ value: 27.728
648
+ - type: map_at_5
649
+ value: 29.247
650
+ - type: mrr_at_1
651
+ value: 28.881
652
+ - type: mrr_at_10
653
+ value: 36.418
654
+ - type: mrr_at_100
655
+ value: 37.347
656
+ - type: mrr_at_1000
657
+ value: 37.415
658
+ - type: mrr_at_3
659
+ value: 33.942
660
+ - type: mrr_at_5
661
+ value: 35.386
662
+ - type: ndcg_at_1
663
+ value: 28.881
664
+ - type: ndcg_at_10
665
+ value: 35.812
666
+ - type: ndcg_at_100
667
+ value: 41.574
668
+ - type: ndcg_at_1000
669
+ value: 44.289
670
+ - type: ndcg_at_3
671
+ value: 31.239
672
+ - type: ndcg_at_5
673
+ value: 33.302
674
+ - type: precision_at_1
675
+ value: 28.881
676
+ - type: precision_at_10
677
+ value: 6.598
678
+ - type: precision_at_100
679
+ value: 1.1079999999999999
680
+ - type: precision_at_1000
681
+ value: 0.151
682
+ - type: precision_at_3
683
+ value: 14.954
684
+ - type: precision_at_5
685
+ value: 10.776
686
+ - type: recall_at_1
687
+ value: 22.416
688
+ - type: recall_at_10
689
+ value: 46.243
690
+ - type: recall_at_100
691
+ value: 71.352
692
+ - type: recall_at_1000
693
+ value: 90.034
694
+ - type: recall_at_3
695
+ value: 32.873000000000005
696
+ - type: recall_at_5
697
+ value: 38.632
698
+ - task:
699
+ type: Retrieval
700
+ dataset:
701
+ type: BeIR/cqadupstack
702
+ name: MTEB CQADupstackRetrieval
703
+ config: default
704
+ split: test
705
+ revision: None
706
+ metrics:
707
+ - type: map_at_1
708
+ value: 22.528166666666667
709
+ - type: map_at_10
710
+ value: 30.317833333333333
711
+ - type: map_at_100
712
+ value: 31.44108333333333
713
+ - type: map_at_1000
714
+ value: 31.566666666666666
715
+ - type: map_at_3
716
+ value: 27.84425
717
+ - type: map_at_5
718
+ value: 29.233333333333334
719
+ - type: mrr_at_1
720
+ value: 26.75733333333333
721
+ - type: mrr_at_10
722
+ value: 34.24425
723
+ - type: mrr_at_100
724
+ value: 35.11375
725
+ - type: mrr_at_1000
726
+ value: 35.184333333333335
727
+ - type: mrr_at_3
728
+ value: 32.01225
729
+ - type: mrr_at_5
730
+ value: 33.31225
731
+ - type: ndcg_at_1
732
+ value: 26.75733333333333
733
+ - type: ndcg_at_10
734
+ value: 35.072583333333334
735
+ - type: ndcg_at_100
736
+ value: 40.13358333333334
737
+ - type: ndcg_at_1000
738
+ value: 42.81825
739
+ - type: ndcg_at_3
740
+ value: 30.79275000000001
741
+ - type: ndcg_at_5
742
+ value: 32.822
743
+ - type: precision_at_1
744
+ value: 26.75733333333333
745
+ - type: precision_at_10
746
+ value: 6.128083333333334
747
+ - type: precision_at_100
748
+ value: 1.019
749
+ - type: precision_at_1000
750
+ value: 0.14391666666666664
751
+ - type: precision_at_3
752
+ value: 14.129916666666665
753
+ - type: precision_at_5
754
+ value: 10.087416666666668
755
+ - type: recall_at_1
756
+ value: 22.528166666666667
757
+ - type: recall_at_10
758
+ value: 45.38341666666667
759
+ - type: recall_at_100
760
+ value: 67.81791666666668
761
+ - type: recall_at_1000
762
+ value: 86.71716666666666
763
+ - type: recall_at_3
764
+ value: 33.38741666666667
765
+ - type: recall_at_5
766
+ value: 38.62041666666667
767
+ - task:
768
+ type: Retrieval
769
+ dataset:
770
+ type: BeIR/cqadupstack
771
+ name: MTEB CQADupstackStatsRetrieval
772
+ config: default
773
+ split: test
774
+ revision: None
775
+ metrics:
776
+ - type: map_at_1
777
+ value: 21.975
778
+ - type: map_at_10
779
+ value: 28.144999999999996
780
+ - type: map_at_100
781
+ value: 28.994999999999997
782
+ - type: map_at_1000
783
+ value: 29.086000000000002
784
+ - type: map_at_3
785
+ value: 25.968999999999998
786
+ - type: map_at_5
787
+ value: 27.321
788
+ - type: mrr_at_1
789
+ value: 25.0
790
+ - type: mrr_at_10
791
+ value: 30.822
792
+ - type: mrr_at_100
793
+ value: 31.647
794
+ - type: mrr_at_1000
795
+ value: 31.712
796
+ - type: mrr_at_3
797
+ value: 28.860000000000003
798
+ - type: mrr_at_5
799
+ value: 30.041
800
+ - type: ndcg_at_1
801
+ value: 25.0
802
+ - type: ndcg_at_10
803
+ value: 31.929999999999996
804
+ - type: ndcg_at_100
805
+ value: 36.258
806
+ - type: ndcg_at_1000
807
+ value: 38.682
808
+ - type: ndcg_at_3
809
+ value: 27.972
810
+ - type: ndcg_at_5
811
+ value: 30.089
812
+ - type: precision_at_1
813
+ value: 25.0
814
+ - type: precision_at_10
815
+ value: 4.923
816
+ - type: precision_at_100
817
+ value: 0.767
818
+ - type: precision_at_1000
819
+ value: 0.106
820
+ - type: precision_at_3
821
+ value: 11.860999999999999
822
+ - type: precision_at_5
823
+ value: 8.466
824
+ - type: recall_at_1
825
+ value: 21.975
826
+ - type: recall_at_10
827
+ value: 41.102
828
+ - type: recall_at_100
829
+ value: 60.866
830
+ - type: recall_at_1000
831
+ value: 78.781
832
+ - type: recall_at_3
833
+ value: 30.268
834
+ - type: recall_at_5
835
+ value: 35.552
836
+ - task:
837
+ type: Retrieval
838
+ dataset:
839
+ type: BeIR/cqadupstack
840
+ name: MTEB CQADupstackTexRetrieval
841
+ config: default
842
+ split: test
843
+ revision: None
844
+ metrics:
845
+ - type: map_at_1
846
+ value: 15.845999999999998
847
+ - type: map_at_10
848
+ value: 21.861
849
+ - type: map_at_100
850
+ value: 22.798
851
+ - type: map_at_1000
852
+ value: 22.925
853
+ - type: map_at_3
854
+ value: 19.922
855
+ - type: map_at_5
856
+ value: 21.054000000000002
857
+ - type: mrr_at_1
858
+ value: 19.098000000000003
859
+ - type: mrr_at_10
860
+ value: 25.397
861
+ - type: mrr_at_100
862
+ value: 26.246000000000002
863
+ - type: mrr_at_1000
864
+ value: 26.33
865
+ - type: mrr_at_3
866
+ value: 23.469
867
+ - type: mrr_at_5
868
+ value: 24.646
869
+ - type: ndcg_at_1
870
+ value: 19.098000000000003
871
+ - type: ndcg_at_10
872
+ value: 25.807999999999996
873
+ - type: ndcg_at_100
874
+ value: 30.445
875
+ - type: ndcg_at_1000
876
+ value: 33.666000000000004
877
+ - type: ndcg_at_3
878
+ value: 22.292
879
+ - type: ndcg_at_5
880
+ value: 24.075
881
+ - type: precision_at_1
882
+ value: 19.098000000000003
883
+ - type: precision_at_10
884
+ value: 4.58
885
+ - type: precision_at_100
886
+ value: 0.8099999999999999
887
+ - type: precision_at_1000
888
+ value: 0.126
889
+ - type: precision_at_3
890
+ value: 10.346
891
+ - type: precision_at_5
892
+ value: 7.542999999999999
893
+ - type: recall_at_1
894
+ value: 15.845999999999998
895
+ - type: recall_at_10
896
+ value: 34.172999999999995
897
+ - type: recall_at_100
898
+ value: 55.24099999999999
899
+ - type: recall_at_1000
900
+ value: 78.644
901
+ - type: recall_at_3
902
+ value: 24.401
903
+ - type: recall_at_5
904
+ value: 28.938000000000002
905
+ - task:
906
+ type: Retrieval
907
+ dataset:
908
+ type: BeIR/cqadupstack
909
+ name: MTEB CQADupstackUnixRetrieval
910
+ config: default
911
+ split: test
912
+ revision: None
913
+ metrics:
914
+ - type: map_at_1
915
+ value: 22.974
916
+ - type: map_at_10
917
+ value: 30.108
918
+ - type: map_at_100
919
+ value: 31.208000000000002
920
+ - type: map_at_1000
921
+ value: 31.330999999999996
922
+ - type: map_at_3
923
+ value: 27.889999999999997
924
+ - type: map_at_5
925
+ value: 29.023
926
+ - type: mrr_at_1
927
+ value: 26.493
928
+ - type: mrr_at_10
929
+ value: 33.726
930
+ - type: mrr_at_100
931
+ value: 34.622
932
+ - type: mrr_at_1000
933
+ value: 34.703
934
+ - type: mrr_at_3
935
+ value: 31.575999999999997
936
+ - type: mrr_at_5
937
+ value: 32.690999999999995
938
+ - type: ndcg_at_1
939
+ value: 26.493
940
+ - type: ndcg_at_10
941
+ value: 34.664
942
+ - type: ndcg_at_100
943
+ value: 39.725
944
+ - type: ndcg_at_1000
945
+ value: 42.648
946
+ - type: ndcg_at_3
947
+ value: 30.447999999999997
948
+ - type: ndcg_at_5
949
+ value: 32.145
950
+ - type: precision_at_1
951
+ value: 26.493
952
+ - type: precision_at_10
953
+ value: 5.7090000000000005
954
+ - type: precision_at_100
955
+ value: 0.9199999999999999
956
+ - type: precision_at_1000
957
+ value: 0.129
958
+ - type: precision_at_3
959
+ value: 13.464
960
+ - type: precision_at_5
961
+ value: 9.384
962
+ - type: recall_at_1
963
+ value: 22.974
964
+ - type: recall_at_10
965
+ value: 45.097
966
+ - type: recall_at_100
967
+ value: 66.908
968
+ - type: recall_at_1000
969
+ value: 87.495
970
+ - type: recall_at_3
971
+ value: 33.338
972
+ - type: recall_at_5
973
+ value: 37.499
974
+ - task:
975
+ type: Retrieval
976
+ dataset:
977
+ type: BeIR/cqadupstack
978
+ name: MTEB CQADupstackWebmastersRetrieval
979
+ config: default
980
+ split: test
981
+ revision: None
982
+ metrics:
983
+ - type: map_at_1
984
+ value: 22.408
985
+ - type: map_at_10
986
+ value: 29.580000000000002
987
+ - type: map_at_100
988
+ value: 31.145
989
+ - type: map_at_1000
990
+ value: 31.369000000000003
991
+ - type: map_at_3
992
+ value: 27.634999999999998
993
+ - type: map_at_5
994
+ value: 28.766000000000002
995
+ - type: mrr_at_1
996
+ value: 27.272999999999996
997
+ - type: mrr_at_10
998
+ value: 33.93
999
+ - type: mrr_at_100
1000
+ value: 34.963
1001
+ - type: mrr_at_1000
1002
+ value: 35.031
1003
+ - type: mrr_at_3
1004
+ value: 32.016
1005
+ - type: mrr_at_5
1006
+ value: 33.221000000000004
1007
+ - type: ndcg_at_1
1008
+ value: 27.272999999999996
1009
+ - type: ndcg_at_10
1010
+ value: 33.993
1011
+ - type: ndcg_at_100
1012
+ value: 40.333999999999996
1013
+ - type: ndcg_at_1000
1014
+ value: 43.361
1015
+ - type: ndcg_at_3
1016
+ value: 30.918
1017
+ - type: ndcg_at_5
1018
+ value: 32.552
1019
+ - type: precision_at_1
1020
+ value: 27.272999999999996
1021
+ - type: precision_at_10
1022
+ value: 6.285
1023
+ - type: precision_at_100
1024
+ value: 1.389
1025
+ - type: precision_at_1000
1026
+ value: 0.232
1027
+ - type: precision_at_3
1028
+ value: 14.427000000000001
1029
+ - type: precision_at_5
1030
+ value: 10.356
1031
+ - type: recall_at_1
1032
+ value: 22.408
1033
+ - type: recall_at_10
1034
+ value: 41.318
1035
+ - type: recall_at_100
1036
+ value: 70.539
1037
+ - type: recall_at_1000
1038
+ value: 90.197
1039
+ - type: recall_at_3
1040
+ value: 32.513
1041
+ - type: recall_at_5
1042
+ value: 37.0
1043
+ - task:
1044
+ type: Retrieval
1045
+ dataset:
1046
+ type: BeIR/cqadupstack
1047
+ name: MTEB CQADupstackWordpressRetrieval
1048
+ config: default
1049
+ split: test
1050
+ revision: None
1051
+ metrics:
1052
+ - type: map_at_1
1053
+ value: 17.258000000000003
1054
+ - type: map_at_10
1055
+ value: 24.294
1056
+ - type: map_at_100
1057
+ value: 25.305
1058
+ - type: map_at_1000
1059
+ value: 25.419999999999998
1060
+ - type: map_at_3
1061
+ value: 22.326999999999998
1062
+ - type: map_at_5
1063
+ value: 23.31
1064
+ - type: mrr_at_1
1065
+ value: 18.484
1066
+ - type: mrr_at_10
1067
+ value: 25.863999999999997
1068
+ - type: mrr_at_100
1069
+ value: 26.766000000000002
1070
+ - type: mrr_at_1000
1071
+ value: 26.855
1072
+ - type: mrr_at_3
1073
+ value: 23.968
1074
+ - type: mrr_at_5
1075
+ value: 24.911
1076
+ - type: ndcg_at_1
1077
+ value: 18.484
1078
+ - type: ndcg_at_10
1079
+ value: 28.433000000000003
1080
+ - type: ndcg_at_100
1081
+ value: 33.405
1082
+ - type: ndcg_at_1000
1083
+ value: 36.375
1084
+ - type: ndcg_at_3
1085
+ value: 24.455
1086
+ - type: ndcg_at_5
1087
+ value: 26.031
1088
+ - type: precision_at_1
1089
+ value: 18.484
1090
+ - type: precision_at_10
1091
+ value: 4.603
1092
+ - type: precision_at_100
1093
+ value: 0.773
1094
+ - type: precision_at_1000
1095
+ value: 0.11299999999999999
1096
+ - type: precision_at_3
1097
+ value: 10.659
1098
+ - type: precision_at_5
1099
+ value: 7.505000000000001
1100
+ - type: recall_at_1
1101
+ value: 17.258000000000003
1102
+ - type: recall_at_10
1103
+ value: 39.589999999999996
1104
+ - type: recall_at_100
1105
+ value: 62.592000000000006
1106
+ - type: recall_at_1000
1107
+ value: 84.917
1108
+ - type: recall_at_3
1109
+ value: 28.706
1110
+ - type: recall_at_5
1111
+ value: 32.224000000000004
1112
+ - task:
1113
+ type: Retrieval
1114
+ dataset:
1115
+ type: climate-fever
1116
+ name: MTEB ClimateFEVER
1117
+ config: default
1118
+ split: test
1119
+ revision: None
1120
+ metrics:
1121
+ - type: map_at_1
1122
+ value: 10.578999999999999
1123
+ - type: map_at_10
1124
+ value: 17.642
1125
+ - type: map_at_100
1126
+ value: 19.451
1127
+ - type: map_at_1000
1128
+ value: 19.647000000000002
1129
+ - type: map_at_3
1130
+ value: 14.618
1131
+ - type: map_at_5
1132
+ value: 16.145
1133
+ - type: mrr_at_1
1134
+ value: 23.322000000000003
1135
+ - type: mrr_at_10
1136
+ value: 34.204
1137
+ - type: mrr_at_100
1138
+ value: 35.185
1139
+ - type: mrr_at_1000
1140
+ value: 35.235
1141
+ - type: mrr_at_3
1142
+ value: 30.847
1143
+ - type: mrr_at_5
1144
+ value: 32.824
1145
+ - type: ndcg_at_1
1146
+ value: 23.322000000000003
1147
+ - type: ndcg_at_10
1148
+ value: 25.352999999999998
1149
+ - type: ndcg_at_100
1150
+ value: 32.574
1151
+ - type: ndcg_at_1000
1152
+ value: 36.073
1153
+ - type: ndcg_at_3
1154
+ value: 20.318
1155
+ - type: ndcg_at_5
1156
+ value: 22.111
1157
+ - type: precision_at_1
1158
+ value: 23.322000000000003
1159
+ - type: precision_at_10
1160
+ value: 8.02
1161
+ - type: precision_at_100
1162
+ value: 1.5730000000000002
1163
+ - type: precision_at_1000
1164
+ value: 0.22200000000000003
1165
+ - type: precision_at_3
1166
+ value: 15.049000000000001
1167
+ - type: precision_at_5
1168
+ value: 11.87
1169
+ - type: recall_at_1
1170
+ value: 10.578999999999999
1171
+ - type: recall_at_10
1172
+ value: 30.964999999999996
1173
+ - type: recall_at_100
1174
+ value: 55.986000000000004
1175
+ - type: recall_at_1000
1176
+ value: 75.565
1177
+ - type: recall_at_3
1178
+ value: 18.686
1179
+ - type: recall_at_5
1180
+ value: 23.629
1181
+ - task:
1182
+ type: Retrieval
1183
+ dataset:
1184
+ type: dbpedia-entity
1185
+ name: MTEB DBPedia
1186
+ config: default
1187
+ split: test
1188
+ revision: None
1189
+ metrics:
1190
+ - type: map_at_1
1191
+ value: 7.327
1192
+ - type: map_at_10
1193
+ value: 14.904
1194
+ - type: map_at_100
1195
+ value: 20.29
1196
+ - type: map_at_1000
1197
+ value: 21.42
1198
+ - type: map_at_3
1199
+ value: 10.911
1200
+ - type: map_at_5
1201
+ value: 12.791
1202
+ - type: mrr_at_1
1203
+ value: 57.25
1204
+ - type: mrr_at_10
1205
+ value: 66.62700000000001
1206
+ - type: mrr_at_100
1207
+ value: 67.035
1208
+ - type: mrr_at_1000
1209
+ value: 67.052
1210
+ - type: mrr_at_3
1211
+ value: 64.833
1212
+ - type: mrr_at_5
1213
+ value: 65.908
1214
+ - type: ndcg_at_1
1215
+ value: 43.75
1216
+ - type: ndcg_at_10
1217
+ value: 32.246
1218
+ - type: ndcg_at_100
1219
+ value: 35.774
1220
+ - type: ndcg_at_1000
1221
+ value: 42.872
1222
+ - type: ndcg_at_3
1223
+ value: 36.64
1224
+ - type: ndcg_at_5
1225
+ value: 34.487
1226
+ - type: precision_at_1
1227
+ value: 57.25
1228
+ - type: precision_at_10
1229
+ value: 25.924999999999997
1230
+ - type: precision_at_100
1231
+ value: 7.670000000000001
1232
+ - type: precision_at_1000
1233
+ value: 1.599
1234
+ - type: precision_at_3
1235
+ value: 41.167
1236
+ - type: precision_at_5
1237
+ value: 34.65
1238
+ - type: recall_at_1
1239
+ value: 7.327
1240
+ - type: recall_at_10
1241
+ value: 19.625
1242
+ - type: recall_at_100
1243
+ value: 41.601
1244
+ - type: recall_at_1000
1245
+ value: 65.117
1246
+ - type: recall_at_3
1247
+ value: 12.308
1248
+ - type: recall_at_5
1249
+ value: 15.437999999999999
1250
+ - task:
1251
+ type: Classification
1252
+ dataset:
1253
+ type: mteb/emotion
1254
+ name: MTEB EmotionClassification
1255
+ config: default
1256
+ split: test
1257
+ revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1258
+ metrics:
1259
+ - type: accuracy
1260
+ value: 44.53
1261
+ - type: f1
1262
+ value: 39.39884255816736
1263
+ - task:
1264
+ type: Retrieval
1265
+ dataset:
1266
+ type: fever
1267
+ name: MTEB FEVER
1268
+ config: default
1269
+ split: test
1270
+ revision: None
1271
+ metrics:
1272
+ - type: map_at_1
1273
+ value: 58.913000000000004
1274
+ - type: map_at_10
1275
+ value: 69.592
1276
+ - type: map_at_100
1277
+ value: 69.95599999999999
1278
+ - type: map_at_1000
1279
+ value: 69.973
1280
+ - type: map_at_3
1281
+ value: 67.716
1282
+ - type: map_at_5
1283
+ value: 68.899
1284
+ - type: mrr_at_1
1285
+ value: 63.561
1286
+ - type: mrr_at_10
1287
+ value: 74.2
1288
+ - type: mrr_at_100
1289
+ value: 74.468
1290
+ - type: mrr_at_1000
1291
+ value: 74.47500000000001
1292
+ - type: mrr_at_3
1293
+ value: 72.442
1294
+ - type: mrr_at_5
1295
+ value: 73.58
1296
+ - type: ndcg_at_1
1297
+ value: 63.561
1298
+ - type: ndcg_at_10
1299
+ value: 74.988
1300
+ - type: ndcg_at_100
1301
+ value: 76.52799999999999
1302
+ - type: ndcg_at_1000
1303
+ value: 76.88000000000001
1304
+ - type: ndcg_at_3
1305
+ value: 71.455
1306
+ - type: ndcg_at_5
1307
+ value: 73.42699999999999
1308
+ - type: precision_at_1
1309
+ value: 63.561
1310
+ - type: precision_at_10
1311
+ value: 9.547
1312
+ - type: precision_at_100
1313
+ value: 1.044
1314
+ - type: precision_at_1000
1315
+ value: 0.109
1316
+ - type: precision_at_3
1317
+ value: 28.143
1318
+ - type: precision_at_5
1319
+ value: 18.008
1320
+ - type: recall_at_1
1321
+ value: 58.913000000000004
1322
+ - type: recall_at_10
1323
+ value: 87.18
1324
+ - type: recall_at_100
1325
+ value: 93.852
1326
+ - type: recall_at_1000
1327
+ value: 96.256
1328
+ - type: recall_at_3
1329
+ value: 77.55199999999999
1330
+ - type: recall_at_5
1331
+ value: 82.42399999999999
1332
+ - task:
1333
+ type: Retrieval
1334
+ dataset:
1335
+ type: fiqa
1336
+ name: MTEB FiQA2018
1337
+ config: default
1338
+ split: test
1339
+ revision: None
1340
+ metrics:
1341
+ - type: map_at_1
1342
+ value: 11.761000000000001
1343
+ - type: map_at_10
1344
+ value: 19.564999999999998
1345
+ - type: map_at_100
1346
+ value: 21.099
1347
+ - type: map_at_1000
1348
+ value: 21.288999999999998
1349
+ - type: map_at_3
1350
+ value: 16.683999999999997
1351
+ - type: map_at_5
1352
+ value: 18.307000000000002
1353
+ - type: mrr_at_1
1354
+ value: 23.302
1355
+ - type: mrr_at_10
1356
+ value: 30.979
1357
+ - type: mrr_at_100
1358
+ value: 32.121
1359
+ - type: mrr_at_1000
1360
+ value: 32.186
1361
+ - type: mrr_at_3
1362
+ value: 28.549000000000003
1363
+ - type: mrr_at_5
1364
+ value: 30.038999999999998
1365
+ - type: ndcg_at_1
1366
+ value: 23.302
1367
+ - type: ndcg_at_10
1368
+ value: 25.592
1369
+ - type: ndcg_at_100
1370
+ value: 32.416
1371
+ - type: ndcg_at_1000
1372
+ value: 36.277
1373
+ - type: ndcg_at_3
1374
+ value: 22.151
1375
+ - type: ndcg_at_5
1376
+ value: 23.483999999999998
1377
+ - type: precision_at_1
1378
+ value: 23.302
1379
+ - type: precision_at_10
1380
+ value: 7.377000000000001
1381
+ - type: precision_at_100
1382
+ value: 1.415
1383
+ - type: precision_at_1000
1384
+ value: 0.212
1385
+ - type: precision_at_3
1386
+ value: 14.712
1387
+ - type: precision_at_5
1388
+ value: 11.358
1389
+ - type: recall_at_1
1390
+ value: 11.761000000000001
1391
+ - type: recall_at_10
1392
+ value: 31.696
1393
+ - type: recall_at_100
1394
+ value: 58.01500000000001
1395
+ - type: recall_at_1000
1396
+ value: 81.572
1397
+ - type: recall_at_3
1398
+ value: 20.742
1399
+ - type: recall_at_5
1400
+ value: 25.707
1401
+ - task:
1402
+ type: Retrieval
1403
+ dataset:
1404
+ type: hotpotqa
1405
+ name: MTEB HotpotQA
1406
+ config: default
1407
+ split: test
1408
+ revision: None
1409
+ metrics:
1410
+ - type: map_at_1
1411
+ value: 32.275
1412
+ - type: map_at_10
1413
+ value: 44.712
1414
+ - type: map_at_100
1415
+ value: 45.621
1416
+ - type: map_at_1000
1417
+ value: 45.698
1418
+ - type: map_at_3
1419
+ value: 42.016999999999996
1420
+ - type: map_at_5
1421
+ value: 43.659
1422
+ - type: mrr_at_1
1423
+ value: 64.551
1424
+ - type: mrr_at_10
1425
+ value: 71.58099999999999
1426
+ - type: mrr_at_100
1427
+ value: 71.952
1428
+ - type: mrr_at_1000
1429
+ value: 71.96900000000001
1430
+ - type: mrr_at_3
1431
+ value: 70.236
1432
+ - type: mrr_at_5
1433
+ value: 71.051
1434
+ - type: ndcg_at_1
1435
+ value: 64.551
1436
+ - type: ndcg_at_10
1437
+ value: 53.913999999999994
1438
+ - type: ndcg_at_100
1439
+ value: 57.421
1440
+ - type: ndcg_at_1000
1441
+ value: 59.06
1442
+ - type: ndcg_at_3
1443
+ value: 49.716
1444
+ - type: ndcg_at_5
1445
+ value: 51.971999999999994
1446
+ - type: precision_at_1
1447
+ value: 64.551
1448
+ - type: precision_at_10
1449
+ value: 11.110000000000001
1450
+ - type: precision_at_100
1451
+ value: 1.388
1452
+ - type: precision_at_1000
1453
+ value: 0.161
1454
+ - type: precision_at_3
1455
+ value: 30.822
1456
+ - type: precision_at_5
1457
+ value: 20.273
1458
+ - type: recall_at_1
1459
+ value: 32.275
1460
+ - type: recall_at_10
1461
+ value: 55.55
1462
+ - type: recall_at_100
1463
+ value: 69.38600000000001
1464
+ - type: recall_at_1000
1465
+ value: 80.35799999999999
1466
+ - type: recall_at_3
1467
+ value: 46.232
1468
+ - type: recall_at_5
1469
+ value: 50.682
1470
+ - task:
1471
+ type: Classification
1472
+ dataset:
1473
+ type: mteb/imdb
1474
+ name: MTEB ImdbClassification
1475
+ config: default
1476
+ split: test
1477
+ revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1478
+ metrics:
1479
+ - type: accuracy
1480
+ value: 76.4604
1481
+ - type: ap
1482
+ value: 70.40498168422701
1483
+ - type: f1
1484
+ value: 76.38572688476046
1485
+ - task:
1486
+ type: Retrieval
1487
+ dataset:
1488
+ type: msmarco
1489
+ name: MTEB MSMARCO
1490
+ config: default
1491
+ split: dev
1492
+ revision: None
1493
+ metrics:
1494
+ - type: map_at_1
1495
+ value: 15.065999999999999
1496
+ - type: map_at_10
1497
+ value: 25.058000000000003
1498
+ - type: map_at_100
1499
+ value: 26.268
1500
+ - type: map_at_1000
1501
+ value: 26.344
1502
+ - type: map_at_3
1503
+ value: 21.626
1504
+ - type: map_at_5
1505
+ value: 23.513
1506
+ - type: mrr_at_1
1507
+ value: 15.501000000000001
1508
+ - type: mrr_at_10
1509
+ value: 25.548
1510
+ - type: mrr_at_100
1511
+ value: 26.723000000000003
1512
+ - type: mrr_at_1000
1513
+ value: 26.793
1514
+ - type: mrr_at_3
1515
+ value: 22.142
1516
+ - type: mrr_at_5
1517
+ value: 24.024
1518
+ - type: ndcg_at_1
1519
+ value: 15.501000000000001
1520
+ - type: ndcg_at_10
1521
+ value: 31.008000000000003
1522
+ - type: ndcg_at_100
1523
+ value: 37.08
1524
+ - type: ndcg_at_1000
1525
+ value: 39.102
1526
+ - type: ndcg_at_3
1527
+ value: 23.921999999999997
1528
+ - type: ndcg_at_5
1529
+ value: 27.307
1530
+ - type: precision_at_1
1531
+ value: 15.501000000000001
1532
+ - type: precision_at_10
1533
+ value: 5.155
1534
+ - type: precision_at_100
1535
+ value: 0.822
1536
+ - type: precision_at_1000
1537
+ value: 0.099
1538
+ - type: precision_at_3
1539
+ value: 10.363
1540
+ - type: precision_at_5
1541
+ value: 7.917000000000001
1542
+ - type: recall_at_1
1543
+ value: 15.065999999999999
1544
+ - type: recall_at_10
1545
+ value: 49.507
1546
+ - type: recall_at_100
1547
+ value: 78.118
1548
+ - type: recall_at_1000
1549
+ value: 93.881
1550
+ - type: recall_at_3
1551
+ value: 30.075000000000003
1552
+ - type: recall_at_5
1553
+ value: 38.222
1554
+ - task:
1555
+ type: Classification
1556
+ dataset:
1557
+ type: mteb/mtop_domain
1558
+ name: MTEB MTOPDomainClassification (en)
1559
+ config: en
1560
+ split: test
1561
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1562
+ metrics:
1563
+ - type: accuracy
1564
+ value: 90.6703146374829
1565
+ - type: f1
1566
+ value: 90.1258004293966
1567
+ - task:
1568
+ type: Classification
1569
+ dataset:
1570
+ type: mteb/mtop_intent
1571
+ name: MTEB MTOPIntentClassification (en)
1572
+ config: en
1573
+ split: test
1574
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1575
+ metrics:
1576
+ - type: accuracy
1577
+ value: 68.29229366165072
1578
+ - type: f1
1579
+ value: 50.016194478997875
1580
+ - task:
1581
+ type: Classification
1582
+ dataset:
1583
+ type: mteb/amazon_massive_intent
1584
+ name: MTEB MassiveIntentClassification (en)
1585
+ config: en
1586
+ split: test
1587
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1588
+ metrics:
1589
+ - type: accuracy
1590
+ value: 68.57767316745124
1591
+ - type: f1
1592
+ value: 67.16194062146954
1593
+ - task:
1594
+ type: Classification
1595
+ dataset:
1596
+ type: mteb/amazon_massive_scenario
1597
+ name: MTEB MassiveScenarioClassification (en)
1598
+ config: en
1599
+ split: test
1600
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
1601
+ metrics:
1602
+ - type: accuracy
1603
+ value: 73.92064559515804
1604
+ - type: f1
1605
+ value: 73.6680729569968
1606
+ - task:
1607
+ type: Clustering
1608
+ dataset:
1609
+ type: mteb/medrxiv-clustering-p2p
1610
+ name: MTEB MedrxivClusteringP2P
1611
+ config: default
1612
+ split: test
1613
+ revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1614
+ metrics:
1615
+ - type: v_measure
1616
+ value: 31.56335607367883
1617
+ - task:
1618
+ type: Clustering
1619
+ dataset:
1620
+ type: mteb/medrxiv-clustering-s2s
1621
+ name: MTEB MedrxivClusteringS2S
1622
+ config: default
1623
+ split: test
1624
+ revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1625
+ metrics:
1626
+ - type: v_measure
1627
+ value: 28.131807833734268
1628
+ - task:
1629
+ type: Reranking
1630
+ dataset:
1631
+ type: mteb/mind_small
1632
+ name: MTEB MindSmallReranking
1633
+ config: default
1634
+ split: test
1635
+ revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1636
+ metrics:
1637
+ - type: map
1638
+ value: 31.07390328719844
1639
+ - type: mrr
1640
+ value: 32.117370992867905
1641
+ - task:
1642
+ type: Retrieval
1643
+ dataset:
1644
+ type: nfcorpus
1645
+ name: MTEB NFCorpus
1646
+ config: default
1647
+ split: test
1648
+ revision: None
1649
+ metrics:
1650
+ - type: map_at_1
1651
+ value: 5.274
1652
+ - type: map_at_10
1653
+ value: 11.489
1654
+ - type: map_at_100
1655
+ value: 14.518
1656
+ - type: map_at_1000
1657
+ value: 15.914
1658
+ - type: map_at_3
1659
+ value: 8.399
1660
+ - type: map_at_5
1661
+ value: 9.889000000000001
1662
+ - type: mrr_at_1
1663
+ value: 42.724000000000004
1664
+ - type: mrr_at_10
1665
+ value: 51.486
1666
+ - type: mrr_at_100
1667
+ value: 51.941
1668
+ - type: mrr_at_1000
1669
+ value: 51.99
1670
+ - type: mrr_at_3
1671
+ value: 49.278
1672
+ - type: mrr_at_5
1673
+ value: 50.485
1674
+ - type: ndcg_at_1
1675
+ value: 39.938
1676
+ - type: ndcg_at_10
1677
+ value: 31.862000000000002
1678
+ - type: ndcg_at_100
1679
+ value: 29.235
1680
+ - type: ndcg_at_1000
1681
+ value: 37.802
1682
+ - type: ndcg_at_3
1683
+ value: 35.754999999999995
1684
+ - type: ndcg_at_5
1685
+ value: 34.447
1686
+ - type: precision_at_1
1687
+ value: 42.105
1688
+ - type: precision_at_10
1689
+ value: 23.901
1690
+ - type: precision_at_100
1691
+ value: 7.715
1692
+ - type: precision_at_1000
1693
+ value: 2.045
1694
+ - type: precision_at_3
1695
+ value: 33.437
1696
+ - type: precision_at_5
1697
+ value: 29.782999999999998
1698
+ - type: recall_at_1
1699
+ value: 5.274
1700
+ - type: recall_at_10
1701
+ value: 15.351
1702
+ - type: recall_at_100
1703
+ value: 29.791
1704
+ - type: recall_at_1000
1705
+ value: 60.722
1706
+ - type: recall_at_3
1707
+ value: 9.411
1708
+ - type: recall_at_5
1709
+ value: 12.171999999999999
1710
+ - task:
1711
+ type: Retrieval
1712
+ dataset:
1713
+ type: nq
1714
+ name: MTEB NQ
1715
+ config: default
1716
+ split: test
1717
+ revision: None
1718
+ metrics:
1719
+ - type: map_at_1
1720
+ value: 16.099
1721
+ - type: map_at_10
1722
+ value: 27.913
1723
+ - type: map_at_100
1724
+ value: 29.281000000000002
1725
+ - type: map_at_1000
1726
+ value: 29.343999999999998
1727
+ - type: map_at_3
1728
+ value: 23.791
1729
+ - type: map_at_5
1730
+ value: 26.049
1731
+ - type: mrr_at_1
1732
+ value: 18.337
1733
+ - type: mrr_at_10
1734
+ value: 29.953999999999997
1735
+ - type: mrr_at_100
1736
+ value: 31.080999999999996
1737
+ - type: mrr_at_1000
1738
+ value: 31.130000000000003
1739
+ - type: mrr_at_3
1740
+ value: 26.168000000000003
1741
+ - type: mrr_at_5
1742
+ value: 28.277
1743
+ - type: ndcg_at_1
1744
+ value: 18.308
1745
+ - type: ndcg_at_10
1746
+ value: 34.938
1747
+ - type: ndcg_at_100
1748
+ value: 41.125
1749
+ - type: ndcg_at_1000
1750
+ value: 42.708
1751
+ - type: ndcg_at_3
1752
+ value: 26.805
1753
+ - type: ndcg_at_5
1754
+ value: 30.686999999999998
1755
+ - type: precision_at_1
1756
+ value: 18.308
1757
+ - type: precision_at_10
1758
+ value: 6.476999999999999
1759
+ - type: precision_at_100
1760
+ value: 0.9939999999999999
1761
+ - type: precision_at_1000
1762
+ value: 0.11399999999999999
1763
+ - type: precision_at_3
1764
+ value: 12.784999999999998
1765
+ - type: precision_at_5
1766
+ value: 9.878
1767
+ - type: recall_at_1
1768
+ value: 16.099
1769
+ - type: recall_at_10
1770
+ value: 54.63
1771
+ - type: recall_at_100
1772
+ value: 82.24900000000001
1773
+ - type: recall_at_1000
1774
+ value: 94.242
1775
+ - type: recall_at_3
1776
+ value: 33.174
1777
+ - type: recall_at_5
1778
+ value: 42.164
1779
+ - task:
1780
+ type: Retrieval
1781
+ dataset:
1782
+ type: quora
1783
+ name: MTEB QuoraRetrieval
1784
+ config: default
1785
+ split: test
1786
+ revision: None
1787
+ metrics:
1788
+ - type: map_at_1
1789
+ value: 67.947
1790
+ - type: map_at_10
1791
+ value: 81.499
1792
+ - type: map_at_100
1793
+ value: 82.17
1794
+ - type: map_at_1000
1795
+ value: 82.194
1796
+ - type: map_at_3
1797
+ value: 78.567
1798
+ - type: map_at_5
1799
+ value: 80.34400000000001
1800
+ - type: mrr_at_1
1801
+ value: 78.18
1802
+ - type: mrr_at_10
1803
+ value: 85.05
1804
+ - type: mrr_at_100
1805
+ value: 85.179
1806
+ - type: mrr_at_1000
1807
+ value: 85.181
1808
+ - type: mrr_at_3
1809
+ value: 83.91
1810
+ - type: mrr_at_5
1811
+ value: 84.638
1812
+ - type: ndcg_at_1
1813
+ value: 78.2
1814
+ - type: ndcg_at_10
1815
+ value: 85.715
1816
+ - type: ndcg_at_100
1817
+ value: 87.2
1818
+ - type: ndcg_at_1000
1819
+ value: 87.39
1820
+ - type: ndcg_at_3
1821
+ value: 82.572
1822
+ - type: ndcg_at_5
1823
+ value: 84.176
1824
+ - type: precision_at_1
1825
+ value: 78.2
1826
+ - type: precision_at_10
1827
+ value: 12.973
1828
+ - type: precision_at_100
1829
+ value: 1.5010000000000001
1830
+ - type: precision_at_1000
1831
+ value: 0.156
1832
+ - type: precision_at_3
1833
+ value: 35.949999999999996
1834
+ - type: precision_at_5
1835
+ value: 23.62
1836
+ - type: recall_at_1
1837
+ value: 67.947
1838
+ - type: recall_at_10
1839
+ value: 93.804
1840
+ - type: recall_at_100
1841
+ value: 98.971
1842
+ - type: recall_at_1000
1843
+ value: 99.91600000000001
1844
+ - type: recall_at_3
1845
+ value: 84.75399999999999
1846
+ - type: recall_at_5
1847
+ value: 89.32
1848
+ - task:
1849
+ type: Clustering
1850
+ dataset:
1851
+ type: mteb/reddit-clustering
1852
+ name: MTEB RedditClustering
1853
+ config: default
1854
+ split: test
1855
+ revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1856
+ metrics:
1857
+ - type: v_measure
1858
+ value: 45.457201684255104
1859
+ - task:
1860
+ type: Clustering
1861
+ dataset:
1862
+ type: mteb/reddit-clustering-p2p
1863
+ name: MTEB RedditClusteringP2P
1864
+ config: default
1865
+ split: test
1866
+ revision: 282350215ef01743dc01b456c7f5241fa8937f16
1867
+ metrics:
1868
+ - type: v_measure
1869
+ value: 55.162226937477875
1870
+ - task:
1871
+ type: Retrieval
1872
+ dataset:
1873
+ type: scidocs
1874
+ name: MTEB SCIDOCS
1875
+ config: default
1876
+ split: test
1877
+ revision: None
1878
+ metrics:
1879
+ - type: map_at_1
1880
+ value: 4.173
1881
+ - type: map_at_10
1882
+ value: 10.463000000000001
1883
+ - type: map_at_100
1884
+ value: 12.278
1885
+ - type: map_at_1000
1886
+ value: 12.572
1887
+ - type: map_at_3
1888
+ value: 7.528
1889
+ - type: map_at_5
1890
+ value: 8.863
1891
+ - type: mrr_at_1
1892
+ value: 20.599999999999998
1893
+ - type: mrr_at_10
1894
+ value: 30.422
1895
+ - type: mrr_at_100
1896
+ value: 31.6
1897
+ - type: mrr_at_1000
1898
+ value: 31.663000000000004
1899
+ - type: mrr_at_3
1900
+ value: 27.400000000000002
1901
+ - type: mrr_at_5
1902
+ value: 29.065
1903
+ - type: ndcg_at_1
1904
+ value: 20.599999999999998
1905
+ - type: ndcg_at_10
1906
+ value: 17.687
1907
+ - type: ndcg_at_100
1908
+ value: 25.172
1909
+ - type: ndcg_at_1000
1910
+ value: 30.617
1911
+ - type: ndcg_at_3
1912
+ value: 16.81
1913
+ - type: ndcg_at_5
1914
+ value: 14.499
1915
+ - type: precision_at_1
1916
+ value: 20.599999999999998
1917
+ - type: precision_at_10
1918
+ value: 9.17
1919
+ - type: precision_at_100
1920
+ value: 2.004
1921
+ - type: precision_at_1000
1922
+ value: 0.332
1923
+ - type: precision_at_3
1924
+ value: 15.6
1925
+ - type: precision_at_5
1926
+ value: 12.58
1927
+ - type: recall_at_1
1928
+ value: 4.173
1929
+ - type: recall_at_10
1930
+ value: 18.575
1931
+ - type: recall_at_100
1932
+ value: 40.692
1933
+ - type: recall_at_1000
1934
+ value: 67.467
1935
+ - type: recall_at_3
1936
+ value: 9.488000000000001
1937
+ - type: recall_at_5
1938
+ value: 12.738
1939
+ - task:
1940
+ type: STS
1941
+ dataset:
1942
+ type: mteb/sickr-sts
1943
+ name: MTEB SICK-R
1944
+ config: default
1945
+ split: test
1946
+ revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1947
+ metrics:
1948
+ - type: cos_sim_pearson
1949
+ value: 81.12603499315416
1950
+ - type: cos_sim_spearman
1951
+ value: 73.62060290948378
1952
+ - type: euclidean_pearson
1953
+ value: 78.14083565781135
1954
+ - type: euclidean_spearman
1955
+ value: 73.16840437541543
1956
+ - type: manhattan_pearson
1957
+ value: 77.92017261109734
1958
+ - type: manhattan_spearman
1959
+ value: 72.8805059949965
1960
+ - task:
1961
+ type: STS
1962
+ dataset:
1963
+ type: mteb/sts12-sts
1964
+ name: MTEB STS12
1965
+ config: default
1966
+ split: test
1967
+ revision: a0d554a64d88156834ff5ae9920b964011b16384
1968
+ metrics:
1969
+ - type: cos_sim_pearson
1970
+ value: 79.75955377133172
1971
+ - type: cos_sim_spearman
1972
+ value: 71.8872633964069
1973
+ - type: euclidean_pearson
1974
+ value: 76.31922068538256
1975
+ - type: euclidean_spearman
1976
+ value: 70.86449661855376
1977
+ - type: manhattan_pearson
1978
+ value: 76.47852229730407
1979
+ - type: manhattan_spearman
1980
+ value: 70.99367421984789
1981
+ - task:
1982
+ type: STS
1983
+ dataset:
1984
+ type: mteb/sts13-sts
1985
+ name: MTEB STS13
1986
+ config: default
1987
+ split: test
1988
+ revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1989
+ metrics:
1990
+ - type: cos_sim_pearson
1991
+ value: 78.80762722908158
1992
+ - type: cos_sim_spearman
1993
+ value: 79.84588978756372
1994
+ - type: euclidean_pearson
1995
+ value: 79.8216849781164
1996
+ - type: euclidean_spearman
1997
+ value: 80.22647061695481
1998
+ - type: manhattan_pearson
1999
+ value: 79.56604194112572
2000
+ - type: manhattan_spearman
2001
+ value: 79.96495189862462
2002
+ - task:
2003
+ type: STS
2004
+ dataset:
2005
+ type: mteb/sts14-sts
2006
+ name: MTEB STS14
2007
+ config: default
2008
+ split: test
2009
+ revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
2010
+ metrics:
2011
+ - type: cos_sim_pearson
2012
+ value: 80.1012718092742
2013
+ - type: cos_sim_spearman
2014
+ value: 76.86011381793661
2015
+ - type: euclidean_pearson
2016
+ value: 79.94426039862019
2017
+ - type: euclidean_spearman
2018
+ value: 77.36751135465131
2019
+ - type: manhattan_pearson
2020
+ value: 79.87959373304288
2021
+ - type: manhattan_spearman
2022
+ value: 77.37717129004746
2023
+ - task:
2024
+ type: STS
2025
+ dataset:
2026
+ type: mteb/sts15-sts
2027
+ name: MTEB STS15
2028
+ config: default
2029
+ split: test
2030
+ revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2031
+ metrics:
2032
+ - type: cos_sim_pearson
2033
+ value: 83.90618420346104
2034
+ - type: cos_sim_spearman
2035
+ value: 84.77290791243722
2036
+ - type: euclidean_pearson
2037
+ value: 84.64732258073293
2038
+ - type: euclidean_spearman
2039
+ value: 85.21053649543357
2040
+ - type: manhattan_pearson
2041
+ value: 84.61616883522647
2042
+ - type: manhattan_spearman
2043
+ value: 85.19803126766931
2044
+ - task:
2045
+ type: STS
2046
+ dataset:
2047
+ type: mteb/sts16-sts
2048
+ name: MTEB STS16
2049
+ config: default
2050
+ split: test
2051
+ revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2052
+ metrics:
2053
+ - type: cos_sim_pearson
2054
+ value: 80.52192114059063
2055
+ - type: cos_sim_spearman
2056
+ value: 81.9103244827937
2057
+ - type: euclidean_pearson
2058
+ value: 80.99375176138985
2059
+ - type: euclidean_spearman
2060
+ value: 81.540250641079
2061
+ - type: manhattan_pearson
2062
+ value: 80.84979573396426
2063
+ - type: manhattan_spearman
2064
+ value: 81.3742591621492
2065
+ - task:
2066
+ type: STS
2067
+ dataset:
2068
+ type: mteb/sts17-crosslingual-sts
2069
+ name: MTEB STS17 (en-en)
2070
+ config: en-en
2071
+ split: test
2072
+ revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2073
+ metrics:
2074
+ - type: cos_sim_pearson
2075
+ value: 85.82166001234197
2076
+ - type: cos_sim_spearman
2077
+ value: 86.81857495659123
2078
+ - type: euclidean_pearson
2079
+ value: 85.72798403202849
2080
+ - type: euclidean_spearman
2081
+ value: 85.70482438950965
2082
+ - type: manhattan_pearson
2083
+ value: 85.51579093130357
2084
+ - type: manhattan_spearman
2085
+ value: 85.41233705379751
2086
+ - task:
2087
+ type: STS
2088
+ dataset:
2089
+ type: mteb/sts22-crosslingual-sts
2090
+ name: MTEB STS22 (en)
2091
+ config: en
2092
+ split: test
2093
+ revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2094
+ metrics:
2095
+ - type: cos_sim_pearson
2096
+ value: 64.48071151079803
2097
+ - type: cos_sim_spearman
2098
+ value: 65.37838108084044
2099
+ - type: euclidean_pearson
2100
+ value: 64.67378947096257
2101
+ - type: euclidean_spearman
2102
+ value: 65.39187147219869
2103
+ - type: manhattan_pearson
2104
+ value: 65.35487466133208
2105
+ - type: manhattan_spearman
2106
+ value: 65.51328499442272
2107
+ - task:
2108
+ type: STS
2109
+ dataset:
2110
+ type: mteb/stsbenchmark-sts
2111
+ name: MTEB STSBenchmark
2112
+ config: default
2113
+ split: test
2114
+ revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2115
+ metrics:
2116
+ - type: cos_sim_pearson
2117
+ value: 82.64702367823314
2118
+ - type: cos_sim_spearman
2119
+ value: 82.49732953181818
2120
+ - type: euclidean_pearson
2121
+ value: 83.05996062475664
2122
+ - type: euclidean_spearman
2123
+ value: 82.28159546751176
2124
+ - type: manhattan_pearson
2125
+ value: 82.98305503664952
2126
+ - type: manhattan_spearman
2127
+ value: 82.18405771943928
2128
+ - task:
2129
+ type: Reranking
2130
+ dataset:
2131
+ type: mteb/scidocs-reranking
2132
+ name: MTEB SciDocsRR
2133
+ config: default
2134
+ split: test
2135
+ revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2136
+ metrics:
2137
+ - type: map
2138
+ value: 78.5744649318696
2139
+ - type: mrr
2140
+ value: 93.35386291268645
2141
+ - task:
2142
+ type: Retrieval
2143
+ dataset:
2144
+ type: scifact
2145
+ name: MTEB SciFact
2146
+ config: default
2147
+ split: test
2148
+ revision: None
2149
+ metrics:
2150
+ - type: map_at_1
2151
+ value: 52.093999999999994
2152
+ - type: map_at_10
2153
+ value: 61.646
2154
+ - type: map_at_100
2155
+ value: 62.197
2156
+ - type: map_at_1000
2157
+ value: 62.22800000000001
2158
+ - type: map_at_3
2159
+ value: 58.411
2160
+ - type: map_at_5
2161
+ value: 60.585
2162
+ - type: mrr_at_1
2163
+ value: 55.00000000000001
2164
+ - type: mrr_at_10
2165
+ value: 62.690999999999995
2166
+ - type: mrr_at_100
2167
+ value: 63.139
2168
+ - type: mrr_at_1000
2169
+ value: 63.166999999999994
2170
+ - type: mrr_at_3
2171
+ value: 60.111000000000004
2172
+ - type: mrr_at_5
2173
+ value: 61.778
2174
+ - type: ndcg_at_1
2175
+ value: 55.00000000000001
2176
+ - type: ndcg_at_10
2177
+ value: 66.271
2178
+ - type: ndcg_at_100
2179
+ value: 68.879
2180
+ - type: ndcg_at_1000
2181
+ value: 69.722
2182
+ - type: ndcg_at_3
2183
+ value: 60.672000000000004
2184
+ - type: ndcg_at_5
2185
+ value: 63.929
2186
+ - type: precision_at_1
2187
+ value: 55.00000000000001
2188
+ - type: precision_at_10
2189
+ value: 9.0
2190
+ - type: precision_at_100
2191
+ value: 1.043
2192
+ - type: precision_at_1000
2193
+ value: 0.11100000000000002
2194
+ - type: precision_at_3
2195
+ value: 23.555999999999997
2196
+ - type: precision_at_5
2197
+ value: 16.2
2198
+ - type: recall_at_1
2199
+ value: 52.093999999999994
2200
+ - type: recall_at_10
2201
+ value: 79.567
2202
+ - type: recall_at_100
2203
+ value: 91.60000000000001
2204
+ - type: recall_at_1000
2205
+ value: 98.333
2206
+ - type: recall_at_3
2207
+ value: 64.633
2208
+ - type: recall_at_5
2209
+ value: 72.68299999999999
2210
+ - task:
2211
+ type: PairClassification
2212
+ dataset:
2213
+ type: mteb/sprintduplicatequestions-pairclassification
2214
+ name: MTEB SprintDuplicateQuestions
2215
+ config: default
2216
+ split: test
2217
+ revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2218
+ metrics:
2219
+ - type: cos_sim_accuracy
2220
+ value: 99.83267326732673
2221
+ - type: cos_sim_ap
2222
+ value: 95.77995366495178
2223
+ - type: cos_sim_f1
2224
+ value: 91.51180311401306
2225
+ - type: cos_sim_precision
2226
+ value: 91.92734611503532
2227
+ - type: cos_sim_recall
2228
+ value: 91.10000000000001
2229
+ - type: dot_accuracy
2230
+ value: 99.63366336633663
2231
+ - type: dot_ap
2232
+ value: 88.53996286967461
2233
+ - type: dot_f1
2234
+ value: 81.06537530266343
2235
+ - type: dot_precision
2236
+ value: 78.59154929577464
2237
+ - type: dot_recall
2238
+ value: 83.7
2239
+ - type: euclidean_accuracy
2240
+ value: 99.82376237623762
2241
+ - type: euclidean_ap
2242
+ value: 95.53192209281187
2243
+ - type: euclidean_f1
2244
+ value: 91.19683481701286
2245
+ - type: euclidean_precision
2246
+ value: 90.21526418786692
2247
+ - type: euclidean_recall
2248
+ value: 92.2
2249
+ - type: manhattan_accuracy
2250
+ value: 99.82376237623762
2251
+ - type: manhattan_ap
2252
+ value: 95.55642082191741
2253
+ - type: manhattan_f1
2254
+ value: 91.16186693147964
2255
+ - type: manhattan_precision
2256
+ value: 90.53254437869822
2257
+ - type: manhattan_recall
2258
+ value: 91.8
2259
+ - type: max_accuracy
2260
+ value: 99.83267326732673
2261
+ - type: max_ap
2262
+ value: 95.77995366495178
2263
+ - type: max_f1
2264
+ value: 91.51180311401306
2265
+ - task:
2266
+ type: Clustering
2267
+ dataset:
2268
+ type: mteb/stackexchange-clustering
2269
+ name: MTEB StackExchangeClustering
2270
+ config: default
2271
+ split: test
2272
+ revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2273
+ metrics:
2274
+ - type: v_measure
2275
+ value: 54.508462134213474
2276
+ - task:
2277
+ type: Clustering
2278
+ dataset:
2279
+ type: mteb/stackexchange-clustering-p2p
2280
+ name: MTEB StackExchangeClusteringP2P
2281
+ config: default
2282
+ split: test
2283
+ revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2284
+ metrics:
2285
+ - type: v_measure
2286
+ value: 34.06549765184959
2287
+ - task:
2288
+ type: Reranking
2289
+ dataset:
2290
+ type: mteb/stackoverflowdupquestions-reranking
2291
+ name: MTEB StackOverflowDupQuestions
2292
+ config: default
2293
+ split: test
2294
+ revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2295
+ metrics:
2296
+ - type: map
2297
+ value: 49.43129549466616
2298
+ - type: mrr
2299
+ value: 50.20613169510227
2300
+ - task:
2301
+ type: Summarization
2302
+ dataset:
2303
+ type: mteb/summeval
2304
+ name: MTEB SummEval
2305
+ config: default
2306
+ split: test
2307
+ revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2308
+ metrics:
2309
+ - type: cos_sim_pearson
2310
+ value: 30.069516173193044
2311
+ - type: cos_sim_spearman
2312
+ value: 29.872498354017353
2313
+ - type: dot_pearson
2314
+ value: 28.80761257516063
2315
+ - type: dot_spearman
2316
+ value: 28.397422678527708
2317
+ - task:
2318
+ type: Retrieval
2319
+ dataset:
2320
+ type: trec-covid
2321
+ name: MTEB TRECCOVID
2322
+ config: default
2323
+ split: test
2324
+ revision: None
2325
+ metrics:
2326
+ - type: map_at_1
2327
+ value: 0.169
2328
+ - type: map_at_10
2329
+ value: 1.208
2330
+ - type: map_at_100
2331
+ value: 5.925
2332
+ - type: map_at_1000
2333
+ value: 14.427000000000001
2334
+ - type: map_at_3
2335
+ value: 0.457
2336
+ - type: map_at_5
2337
+ value: 0.716
2338
+ - type: mrr_at_1
2339
+ value: 64.0
2340
+ - type: mrr_at_10
2341
+ value: 74.075
2342
+ - type: mrr_at_100
2343
+ value: 74.303
2344
+ - type: mrr_at_1000
2345
+ value: 74.303
2346
+ - type: mrr_at_3
2347
+ value: 71.0
2348
+ - type: mrr_at_5
2349
+ value: 72.89999999999999
2350
+ - type: ndcg_at_1
2351
+ value: 57.99999999999999
2352
+ - type: ndcg_at_10
2353
+ value: 50.376
2354
+ - type: ndcg_at_100
2355
+ value: 38.582
2356
+ - type: ndcg_at_1000
2357
+ value: 35.663
2358
+ - type: ndcg_at_3
2359
+ value: 55.592
2360
+ - type: ndcg_at_5
2361
+ value: 53.647999999999996
2362
+ - type: precision_at_1
2363
+ value: 64.0
2364
+ - type: precision_at_10
2365
+ value: 53.2
2366
+ - type: precision_at_100
2367
+ value: 39.6
2368
+ - type: precision_at_1000
2369
+ value: 16.218
2370
+ - type: precision_at_3
2371
+ value: 59.333000000000006
2372
+ - type: precision_at_5
2373
+ value: 57.599999999999994
2374
+ - type: recall_at_1
2375
+ value: 0.169
2376
+ - type: recall_at_10
2377
+ value: 1.423
2378
+ - type: recall_at_100
2379
+ value: 9.049999999999999
2380
+ - type: recall_at_1000
2381
+ value: 34.056999999999995
2382
+ - type: recall_at_3
2383
+ value: 0.48700000000000004
2384
+ - type: recall_at_5
2385
+ value: 0.792
2386
+ - task:
2387
+ type: Retrieval
2388
+ dataset:
2389
+ type: webis-touche2020
2390
+ name: MTEB Touche2020
2391
+ config: default
2392
+ split: test
2393
+ revision: None
2394
+ metrics:
2395
+ - type: map_at_1
2396
+ value: 1.319
2397
+ - type: map_at_10
2398
+ value: 7.112
2399
+ - type: map_at_100
2400
+ value: 12.588
2401
+ - type: map_at_1000
2402
+ value: 14.056
2403
+ - type: map_at_3
2404
+ value: 2.8049999999999997
2405
+ - type: map_at_5
2406
+ value: 4.68
2407
+ - type: mrr_at_1
2408
+ value: 18.367
2409
+ - type: mrr_at_10
2410
+ value: 33.94
2411
+ - type: mrr_at_100
2412
+ value: 35.193000000000005
2413
+ - type: mrr_at_1000
2414
+ value: 35.193000000000005
2415
+ - type: mrr_at_3
2416
+ value: 29.932
2417
+ - type: mrr_at_5
2418
+ value: 32.279
2419
+ - type: ndcg_at_1
2420
+ value: 15.306000000000001
2421
+ - type: ndcg_at_10
2422
+ value: 18.096
2423
+ - type: ndcg_at_100
2424
+ value: 30.512
2425
+ - type: ndcg_at_1000
2426
+ value: 42.148
2427
+ - type: ndcg_at_3
2428
+ value: 17.034
2429
+ - type: ndcg_at_5
2430
+ value: 18.509
2431
+ - type: precision_at_1
2432
+ value: 18.367
2433
+ - type: precision_at_10
2434
+ value: 18.776
2435
+ - type: precision_at_100
2436
+ value: 7.02
2437
+ - type: precision_at_1000
2438
+ value: 1.467
2439
+ - type: precision_at_3
2440
+ value: 19.048000000000002
2441
+ - type: precision_at_5
2442
+ value: 22.041
2443
+ - type: recall_at_1
2444
+ value: 1.319
2445
+ - type: recall_at_10
2446
+ value: 13.748
2447
+ - type: recall_at_100
2448
+ value: 43.972
2449
+ - type: recall_at_1000
2450
+ value: 79.557
2451
+ - type: recall_at_3
2452
+ value: 4.042
2453
+ - type: recall_at_5
2454
+ value: 7.742
2455
+ - task:
2456
+ type: Classification
2457
+ dataset:
2458
+ type: mteb/toxic_conversations_50k
2459
+ name: MTEB ToxicConversationsClassification
2460
+ config: default
2461
+ split: test
2462
+ revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2463
+ metrics:
2464
+ - type: accuracy
2465
+ value: 70.2282
2466
+ - type: ap
2467
+ value: 13.995763859570426
2468
+ - type: f1
2469
+ value: 54.08126256731344
2470
+ - task:
2471
+ type: Classification
2472
+ dataset:
2473
+ type: mteb/tweet_sentiment_extraction
2474
+ name: MTEB TweetSentimentExtractionClassification
2475
+ config: default
2476
+ split: test
2477
+ revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2478
+ metrics:
2479
+ - type: accuracy
2480
+ value: 57.64006791171477
2481
+ - type: f1
2482
+ value: 57.95841320748957
2483
+ - task:
2484
+ type: Clustering
2485
+ dataset:
2486
+ type: mteb/twentynewsgroups-clustering
2487
+ name: MTEB TwentyNewsgroupsClustering
2488
+ config: default
2489
+ split: test
2490
+ revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2491
+ metrics:
2492
+ - type: v_measure
2493
+ value: 40.19267841788564
2494
+ - task:
2495
+ type: PairClassification
2496
+ dataset:
2497
+ type: mteb/twittersemeval2015-pairclassification
2498
+ name: MTEB TwitterSemEval2015
2499
+ config: default
2500
+ split: test
2501
+ revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2502
+ metrics:
2503
+ - type: cos_sim_accuracy
2504
+ value: 83.96614412588663
2505
+ - type: cos_sim_ap
2506
+ value: 67.75985678572738
2507
+ - type: cos_sim_f1
2508
+ value: 64.04661542276222
2509
+ - type: cos_sim_precision
2510
+ value: 60.406922357343305
2511
+ - type: cos_sim_recall
2512
+ value: 68.15303430079156
2513
+ - type: dot_accuracy
2514
+ value: 79.5732252488526
2515
+ - type: dot_ap
2516
+ value: 51.30562107572645
2517
+ - type: dot_f1
2518
+ value: 53.120759837177744
2519
+ - type: dot_precision
2520
+ value: 46.478037198258804
2521
+ - type: dot_recall
2522
+ value: 61.97889182058047
2523
+ - type: euclidean_accuracy
2524
+ value: 84.00786791440663
2525
+ - type: euclidean_ap
2526
+ value: 67.58930214486998
2527
+ - type: euclidean_f1
2528
+ value: 64.424821579775
2529
+ - type: euclidean_precision
2530
+ value: 59.4817958454322
2531
+ - type: euclidean_recall
2532
+ value: 70.26385224274406
2533
+ - type: manhattan_accuracy
2534
+ value: 83.87673600762949
2535
+ - type: manhattan_ap
2536
+ value: 67.4250981523309
2537
+ - type: manhattan_f1
2538
+ value: 64.10286658015808
2539
+ - type: manhattan_precision
2540
+ value: 57.96885001066781
2541
+ - type: manhattan_recall
2542
+ value: 71.68865435356201
2543
+ - type: max_accuracy
2544
+ value: 84.00786791440663
2545
+ - type: max_ap
2546
+ value: 67.75985678572738
2547
+ - type: max_f1
2548
+ value: 64.424821579775
2549
+ - task:
2550
+ type: PairClassification
2551
+ dataset:
2552
+ type: mteb/twitterurlcorpus-pairclassification
2553
+ name: MTEB TwitterURLCorpus
2554
+ config: default
2555
+ split: test
2556
+ revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2557
+ metrics:
2558
+ - type: cos_sim_accuracy
2559
+ value: 88.41347459929368
2560
+ - type: cos_sim_ap
2561
+ value: 84.89261930113058
2562
+ - type: cos_sim_f1
2563
+ value: 77.13677607258877
2564
+ - type: cos_sim_precision
2565
+ value: 74.88581164358733
2566
+ - type: cos_sim_recall
2567
+ value: 79.52725592854944
2568
+ - type: dot_accuracy
2569
+ value: 86.32359219156285
2570
+ - type: dot_ap
2571
+ value: 79.29794992131094
2572
+ - type: dot_f1
2573
+ value: 72.84356337679777
2574
+ - type: dot_precision
2575
+ value: 67.31761478675462
2576
+ - type: dot_recall
2577
+ value: 79.35786880197105
2578
+ - type: euclidean_accuracy
2579
+ value: 88.33585593976791
2580
+ - type: euclidean_ap
2581
+ value: 84.73257641312746
2582
+ - type: euclidean_f1
2583
+ value: 76.83529582788195
2584
+ - type: euclidean_precision
2585
+ value: 72.76294052863436
2586
+ - type: euclidean_recall
2587
+ value: 81.3905143209116
2588
+ - type: manhattan_accuracy
2589
+ value: 88.3086894089339
2590
+ - type: manhattan_ap
2591
+ value: 84.66304891729399
2592
+ - type: manhattan_f1
2593
+ value: 76.8181650632165
2594
+ - type: manhattan_precision
2595
+ value: 73.6864436744219
2596
+ - type: manhattan_recall
2597
+ value: 80.22790267939637
2598
+ - type: max_accuracy
2599
+ value: 88.41347459929368
2600
+ - type: max_ap
2601
+ value: 84.89261930113058
2602
+ - type: max_f1
2603
+ value: 77.13677607258877
2604
+ ---
2605
+
2606
+ # bge-micro-v2
2607
+
2608
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
2609
+
2610
+ Distilled in a 2-step training process (bge-micro was step 1) from `BAAI/bge-small-en-v1.5`.
2611
+
2612
+ ## Usage (Sentence-Transformers)
2613
+
2614
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
2615
+
2616
+ ```
2617
+ pip install -U sentence-transformers
2618
+ ```
2619
+
2620
+ Then you can use the model like this:
2621
+
2622
+ ```python
2623
+ from sentence_transformers import SentenceTransformer
2624
+ sentences = ["This is an example sentence", "Each sentence is converted"]
2625
+
2626
+ model = SentenceTransformer('{MODEL_NAME}')
2627
+ embeddings = model.encode(sentences)
2628
+ print(embeddings)
2629
+ ```
2630
+
2631
+
2632
+
2633
+ ## Usage (HuggingFace Transformers)
2634
+ Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
2635
+
2636
+ ```python
2637
+ from transformers import AutoTokenizer, AutoModel
2638
+ import torch
2639
+
2640
+
2641
+ #Mean Pooling - Take attention mask into account for correct averaging
2642
+ def mean_pooling(model_output, attention_mask):
2643
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
2644
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
2645
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
2646
+
2647
+
2648
+ # Sentences we want sentence embeddings for
2649
+ sentences = ['This is an example sentence', 'Each sentence is converted']
2650
+
2651
+ # Load model from HuggingFace Hub
2652
+ tokenizer = AutoTokenizer.from_pretrained('{MODEL_NAME}')
2653
+ model = AutoModel.from_pretrained('{MODEL_NAME}')
2654
+
2655
+ # Tokenize sentences
2656
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
2657
+
2658
+ # Compute token embeddings
2659
+ with torch.no_grad():
2660
+ model_output = model(**encoded_input)
2661
+
2662
+ # Perform pooling. In this case, mean pooling.
2663
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
2664
+
2665
+ print("Sentence embeddings:")
2666
+ print(sentence_embeddings)
2667
+ ```
2668
+
2669
+
2670
+
2671
+ ## Evaluation Results
2672
+
2673
+ <!--- Describe how your model was evaluated -->
2674
+
2675
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
2676
+
2677
+
2678
+
2679
+ ## Full Model Architecture
2680
+ ```
2681
+ SentenceTransformer(
2682
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
2683
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
2684
+ )
2685
+ ```
2686
+
2687
+ ## Citing & Authors
2688
+
2689
+ <!--- Describe where people can find more information -->
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jupyter-wb536061/.cache/torch/sentence_transformers/TaylorAI_bge-micro-v2/",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "bert",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 3,
24
+ "pad_token_id": 0,
25
+ "position_embedding_type": "absolute",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.36.2",
28
+ "type_vocab_size": 2,
29
+ "use_cache": true,
30
+ "vocab_size": 30522
31
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.2",
4
+ "transformers": "4.34.0",
5
+ "pytorch": "2.0.1+cu118"
6
+ }
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5470e7d97cd3ddeef35e3519c36cefd133e80b4ba1f04636412d455428c120d1
3
+ size 69565312
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/special_tokens_map.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[PAD]",
4
+ "[UNK]",
5
+ "[CLS]",
6
+ "[SEP]",
7
+ "[MASK]"
8
+ ],
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "mask_token": {
17
+ "content": "[MASK]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "pad_token": {
24
+ "content": "[PAD]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "sep_token": {
31
+ "content": "[SEP]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "unk_token": {
38
+ "content": "[UNK]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ }
44
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/tokenizer_config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "additional_special_tokens": [
45
+ "[PAD]",
46
+ "[UNK]",
47
+ "[CLS]",
48
+ "[SEP]",
49
+ "[MASK]"
50
+ ],
51
+ "clean_up_tokenization_spaces": true,
52
+ "cls_token": "[CLS]",
53
+ "do_basic_tokenize": true,
54
+ "do_lower_case": true,
55
+ "mask_token": "[MASK]",
56
+ "max_length": 512,
57
+ "model_max_length": 1000000000000000019884624838656,
58
+ "never_split": null,
59
+ "pad_to_multiple_of": null,
60
+ "pad_token": "[PAD]",
61
+ "pad_token_type_id": 0,
62
+ "padding_side": "right",
63
+ "sep_token": "[SEP]",
64
+ "stride": 0,
65
+ "strip_accents": null,
66
+ "tokenize_chinese_chars": true,
67
+ "tokenizer_class": "BertTokenizer",
68
+ "truncation_side": "right",
69
+ "truncation_strategy": "longest_first",
70
+ "unk_token": "[UNK]"
71
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_00_TaylorAI_bge-micro-v2/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/README.md ADDED
@@ -0,0 +1,3012 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - feature-extraction
5
+ - sentence-similarity
6
+ - transformers
7
+ - mteb
8
+ model-index:
9
+ - name: bge-small-en-v1.5-angle
10
+ results:
11
+ - task:
12
+ type: Classification
13
+ dataset:
14
+ type: mteb/amazon_counterfactual
15
+ name: MTEB AmazonCounterfactualClassification (en)
16
+ config: en
17
+ split: test
18
+ revision: e8379541af4e31359cca9fbcf4b00f2671dba205
19
+ metrics:
20
+ - type: accuracy
21
+ value: 73.79104477611939
22
+ - type: ap
23
+ value: 37.21923821573361
24
+ - type: f1
25
+ value: 68.0914945617093
26
+ - task:
27
+ type: Classification
28
+ dataset:
29
+ type: mteb/amazon_polarity
30
+ name: MTEB AmazonPolarityClassification
31
+ config: default
32
+ split: test
33
+ revision: e2d317d38cd51312af73b3d32a06d1a08b442046
34
+ metrics:
35
+ - type: accuracy
36
+ value: 92.75377499999999
37
+ - type: ap
38
+ value: 89.46766124546022
39
+ - type: f1
40
+ value: 92.73884001331487
41
+ - task:
42
+ type: Classification
43
+ dataset:
44
+ type: mteb/amazon_reviews_multi
45
+ name: MTEB AmazonReviewsClassification (en)
46
+ config: en
47
+ split: test
48
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
49
+ metrics:
50
+ - type: accuracy
51
+ value: 46.986
52
+ - type: f1
53
+ value: 46.55936786727896
54
+ - task:
55
+ type: Retrieval
56
+ dataset:
57
+ type: arguana
58
+ name: MTEB ArguAna
59
+ config: default
60
+ split: test
61
+ revision: None
62
+ metrics:
63
+ - type: map_at_1
64
+ value: 35.846000000000004
65
+ - type: map_at_10
66
+ value: 51.388
67
+ - type: map_at_100
68
+ value: 52.132999999999996
69
+ - type: map_at_1000
70
+ value: 52.141000000000005
71
+ - type: map_at_3
72
+ value: 47.037
73
+ - type: map_at_5
74
+ value: 49.579
75
+ - type: mrr_at_1
76
+ value: 36.558
77
+ - type: mrr_at_10
78
+ value: 51.658
79
+ - type: mrr_at_100
80
+ value: 52.402
81
+ - type: mrr_at_1000
82
+ value: 52.410000000000004
83
+ - type: mrr_at_3
84
+ value: 47.345
85
+ - type: mrr_at_5
86
+ value: 49.797999999999995
87
+ - type: ndcg_at_1
88
+ value: 35.846000000000004
89
+ - type: ndcg_at_10
90
+ value: 59.550000000000004
91
+ - type: ndcg_at_100
92
+ value: 62.596
93
+ - type: ndcg_at_1000
94
+ value: 62.759
95
+ - type: ndcg_at_3
96
+ value: 50.666999999999994
97
+ - type: ndcg_at_5
98
+ value: 55.228
99
+ - type: precision_at_1
100
+ value: 35.846000000000004
101
+ - type: precision_at_10
102
+ value: 8.542
103
+ - type: precision_at_100
104
+ value: 0.984
105
+ - type: precision_at_1000
106
+ value: 0.1
107
+ - type: precision_at_3
108
+ value: 20.389
109
+ - type: precision_at_5
110
+ value: 14.438
111
+ - type: recall_at_1
112
+ value: 35.846000000000004
113
+ - type: recall_at_10
114
+ value: 85.42
115
+ - type: recall_at_100
116
+ value: 98.43499999999999
117
+ - type: recall_at_1000
118
+ value: 99.644
119
+ - type: recall_at_3
120
+ value: 61.166
121
+ - type: recall_at_5
122
+ value: 72.191
123
+ - task:
124
+ type: Clustering
125
+ dataset:
126
+ type: mteb/arxiv-clustering-p2p
127
+ name: MTEB ArxivClusteringP2P
128
+ config: default
129
+ split: test
130
+ revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
131
+ metrics:
132
+ - type: v_measure
133
+ value: 47.402770198163594
134
+ - task:
135
+ type: Clustering
136
+ dataset:
137
+ type: mteb/arxiv-clustering-s2s
138
+ name: MTEB ArxivClusteringS2S
139
+ config: default
140
+ split: test
141
+ revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
142
+ metrics:
143
+ - type: v_measure
144
+ value: 40.01545436974177
145
+ - task:
146
+ type: Reranking
147
+ dataset:
148
+ type: mteb/askubuntudupquestions-reranking
149
+ name: MTEB AskUbuntuDupQuestions
150
+ config: default
151
+ split: test
152
+ revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
153
+ metrics:
154
+ - type: map
155
+ value: 62.586465273207196
156
+ - type: mrr
157
+ value: 74.42169019038825
158
+ - task:
159
+ type: STS
160
+ dataset:
161
+ type: mteb/biosses-sts
162
+ name: MTEB BIOSSES
163
+ config: default
164
+ split: test
165
+ revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
166
+ metrics:
167
+ - type: cos_sim_pearson
168
+ value: 85.1891186537969
169
+ - type: cos_sim_spearman
170
+ value: 83.75492046087288
171
+ - type: euclidean_pearson
172
+ value: 84.11766204805357
173
+ - type: euclidean_spearman
174
+ value: 84.01456493126516
175
+ - type: manhattan_pearson
176
+ value: 84.2132950502772
177
+ - type: manhattan_spearman
178
+ value: 83.89227298813377
179
+ - task:
180
+ type: Classification
181
+ dataset:
182
+ type: mteb/banking77
183
+ name: MTEB Banking77Classification
184
+ config: default
185
+ split: test
186
+ revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
187
+ metrics:
188
+ - type: accuracy
189
+ value: 85.74025974025975
190
+ - type: f1
191
+ value: 85.71493566466381
192
+ - task:
193
+ type: Clustering
194
+ dataset:
195
+ type: mteb/biorxiv-clustering-p2p
196
+ name: MTEB BiorxivClusteringP2P
197
+ config: default
198
+ split: test
199
+ revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
200
+ metrics:
201
+ - type: v_measure
202
+ value: 38.467181385006434
203
+ - task:
204
+ type: Clustering
205
+ dataset:
206
+ type: mteb/biorxiv-clustering-s2s
207
+ name: MTEB BiorxivClusteringS2S
208
+ config: default
209
+ split: test
210
+ revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
211
+ metrics:
212
+ - type: v_measure
213
+ value: 34.719496037339056
214
+ - task:
215
+ type: Retrieval
216
+ dataset:
217
+ type: BeIR/cqadupstack
218
+ name: MTEB CQADupstackAndroidRetrieval
219
+ config: default
220
+ split: test
221
+ revision: None
222
+ metrics:
223
+ - type: map_at_1
224
+ value: 29.587000000000003
225
+ - type: map_at_10
226
+ value: 41.114
227
+ - type: map_at_100
228
+ value: 42.532
229
+ - type: map_at_1000
230
+ value: 42.661
231
+ - type: map_at_3
232
+ value: 37.483
233
+ - type: map_at_5
234
+ value: 39.652
235
+ - type: mrr_at_1
236
+ value: 36.338
237
+ - type: mrr_at_10
238
+ value: 46.763
239
+ - type: mrr_at_100
240
+ value: 47.393
241
+ - type: mrr_at_1000
242
+ value: 47.445
243
+ - type: mrr_at_3
244
+ value: 43.538
245
+ - type: mrr_at_5
246
+ value: 45.556000000000004
247
+ - type: ndcg_at_1
248
+ value: 36.338
249
+ - type: ndcg_at_10
250
+ value: 47.658
251
+ - type: ndcg_at_100
252
+ value: 52.824000000000005
253
+ - type: ndcg_at_1000
254
+ value: 54.913999999999994
255
+ - type: ndcg_at_3
256
+ value: 41.989
257
+ - type: ndcg_at_5
258
+ value: 44.944
259
+ - type: precision_at_1
260
+ value: 36.338
261
+ - type: precision_at_10
262
+ value: 9.156
263
+ - type: precision_at_100
264
+ value: 1.4789999999999999
265
+ - type: precision_at_1000
266
+ value: 0.196
267
+ - type: precision_at_3
268
+ value: 20.076
269
+ - type: precision_at_5
270
+ value: 14.85
271
+ - type: recall_at_1
272
+ value: 29.587000000000003
273
+ - type: recall_at_10
274
+ value: 60.746
275
+ - type: recall_at_100
276
+ value: 82.157
277
+ - type: recall_at_1000
278
+ value: 95.645
279
+ - type: recall_at_3
280
+ value: 44.821
281
+ - type: recall_at_5
282
+ value: 52.819
283
+ - task:
284
+ type: Retrieval
285
+ dataset:
286
+ type: BeIR/cqadupstack
287
+ name: MTEB CQADupstackEnglishRetrieval
288
+ config: default
289
+ split: test
290
+ revision: None
291
+ metrics:
292
+ - type: map_at_1
293
+ value: 30.239
294
+ - type: map_at_10
295
+ value: 39.989000000000004
296
+ - type: map_at_100
297
+ value: 41.196
298
+ - type: map_at_1000
299
+ value: 41.325
300
+ - type: map_at_3
301
+ value: 37.261
302
+ - type: map_at_5
303
+ value: 38.833
304
+ - type: mrr_at_1
305
+ value: 37.516
306
+ - type: mrr_at_10
307
+ value: 46.177
308
+ - type: mrr_at_100
309
+ value: 46.806
310
+ - type: mrr_at_1000
311
+ value: 46.849000000000004
312
+ - type: mrr_at_3
313
+ value: 44.002
314
+ - type: mrr_at_5
315
+ value: 45.34
316
+ - type: ndcg_at_1
317
+ value: 37.516
318
+ - type: ndcg_at_10
319
+ value: 45.586
320
+ - type: ndcg_at_100
321
+ value: 49.897000000000006
322
+ - type: ndcg_at_1000
323
+ value: 51.955
324
+ - type: ndcg_at_3
325
+ value: 41.684
326
+ - type: ndcg_at_5
327
+ value: 43.617
328
+ - type: precision_at_1
329
+ value: 37.516
330
+ - type: precision_at_10
331
+ value: 8.522
332
+ - type: precision_at_100
333
+ value: 1.374
334
+ - type: precision_at_1000
335
+ value: 0.184
336
+ - type: precision_at_3
337
+ value: 20.105999999999998
338
+ - type: precision_at_5
339
+ value: 14.152999999999999
340
+ - type: recall_at_1
341
+ value: 30.239
342
+ - type: recall_at_10
343
+ value: 55.03
344
+ - type: recall_at_100
345
+ value: 73.375
346
+ - type: recall_at_1000
347
+ value: 86.29599999999999
348
+ - type: recall_at_3
349
+ value: 43.269000000000005
350
+ - type: recall_at_5
351
+ value: 48.878
352
+ - task:
353
+ type: Retrieval
354
+ dataset:
355
+ type: BeIR/cqadupstack
356
+ name: MTEB CQADupstackGamingRetrieval
357
+ config: default
358
+ split: test
359
+ revision: None
360
+ metrics:
361
+ - type: map_at_1
362
+ value: 38.338
363
+ - type: map_at_10
364
+ value: 50.468999999999994
365
+ - type: map_at_100
366
+ value: 51.553000000000004
367
+ - type: map_at_1000
368
+ value: 51.608
369
+ - type: map_at_3
370
+ value: 47.107
371
+ - type: map_at_5
372
+ value: 49.101
373
+ - type: mrr_at_1
374
+ value: 44.201
375
+ - type: mrr_at_10
376
+ value: 54.057
377
+ - type: mrr_at_100
378
+ value: 54.764
379
+ - type: mrr_at_1000
380
+ value: 54.791000000000004
381
+ - type: mrr_at_3
382
+ value: 51.56699999999999
383
+ - type: mrr_at_5
384
+ value: 53.05
385
+ - type: ndcg_at_1
386
+ value: 44.201
387
+ - type: ndcg_at_10
388
+ value: 56.379000000000005
389
+ - type: ndcg_at_100
390
+ value: 60.645
391
+ - type: ndcg_at_1000
392
+ value: 61.73499999999999
393
+ - type: ndcg_at_3
394
+ value: 50.726000000000006
395
+ - type: ndcg_at_5
396
+ value: 53.58500000000001
397
+ - type: precision_at_1
398
+ value: 44.201
399
+ - type: precision_at_10
400
+ value: 9.141
401
+ - type: precision_at_100
402
+ value: 1.216
403
+ - type: precision_at_1000
404
+ value: 0.135
405
+ - type: precision_at_3
406
+ value: 22.654
407
+ - type: precision_at_5
408
+ value: 15.723999999999998
409
+ - type: recall_at_1
410
+ value: 38.338
411
+ - type: recall_at_10
412
+ value: 70.30499999999999
413
+ - type: recall_at_100
414
+ value: 88.77199999999999
415
+ - type: recall_at_1000
416
+ value: 96.49799999999999
417
+ - type: recall_at_3
418
+ value: 55.218
419
+ - type: recall_at_5
420
+ value: 62.104000000000006
421
+ - task:
422
+ type: Retrieval
423
+ dataset:
424
+ type: BeIR/cqadupstack
425
+ name: MTEB CQADupstackGisRetrieval
426
+ config: default
427
+ split: test
428
+ revision: None
429
+ metrics:
430
+ - type: map_at_1
431
+ value: 25.682
432
+ - type: map_at_10
433
+ value: 33.498
434
+ - type: map_at_100
435
+ value: 34.461000000000006
436
+ - type: map_at_1000
437
+ value: 34.544000000000004
438
+ - type: map_at_3
439
+ value: 30.503999999999998
440
+ - type: map_at_5
441
+ value: 32.216
442
+ - type: mrr_at_1
443
+ value: 27.683999999999997
444
+ - type: mrr_at_10
445
+ value: 35.467999999999996
446
+ - type: mrr_at_100
447
+ value: 36.32
448
+ - type: mrr_at_1000
449
+ value: 36.386
450
+ - type: mrr_at_3
451
+ value: 32.618
452
+ - type: mrr_at_5
453
+ value: 34.262
454
+ - type: ndcg_at_1
455
+ value: 27.683999999999997
456
+ - type: ndcg_at_10
457
+ value: 38.378
458
+ - type: ndcg_at_100
459
+ value: 43.288
460
+ - type: ndcg_at_1000
461
+ value: 45.413
462
+ - type: ndcg_at_3
463
+ value: 32.586
464
+ - type: ndcg_at_5
465
+ value: 35.499
466
+ - type: precision_at_1
467
+ value: 27.683999999999997
468
+ - type: precision_at_10
469
+ value: 5.864
470
+ - type: precision_at_100
471
+ value: 0.882
472
+ - type: precision_at_1000
473
+ value: 0.11
474
+ - type: precision_at_3
475
+ value: 13.446
476
+ - type: precision_at_5
477
+ value: 9.718
478
+ - type: recall_at_1
479
+ value: 25.682
480
+ - type: recall_at_10
481
+ value: 51.712
482
+ - type: recall_at_100
483
+ value: 74.446
484
+ - type: recall_at_1000
485
+ value: 90.472
486
+ - type: recall_at_3
487
+ value: 36.236000000000004
488
+ - type: recall_at_5
489
+ value: 43.234
490
+ - task:
491
+ type: Retrieval
492
+ dataset:
493
+ type: BeIR/cqadupstack
494
+ name: MTEB CQADupstackMathematicaRetrieval
495
+ config: default
496
+ split: test
497
+ revision: None
498
+ metrics:
499
+ - type: map_at_1
500
+ value: 16.073999999999998
501
+ - type: map_at_10
502
+ value: 24.352999999999998
503
+ - type: map_at_100
504
+ value: 25.438
505
+ - type: map_at_1000
506
+ value: 25.545
507
+ - type: map_at_3
508
+ value: 21.614
509
+ - type: map_at_5
510
+ value: 23.104
511
+ - type: mrr_at_1
512
+ value: 19.776
513
+ - type: mrr_at_10
514
+ value: 28.837000000000003
515
+ - type: mrr_at_100
516
+ value: 29.755
517
+ - type: mrr_at_1000
518
+ value: 29.817
519
+ - type: mrr_at_3
520
+ value: 26.201999999999998
521
+ - type: mrr_at_5
522
+ value: 27.714
523
+ - type: ndcg_at_1
524
+ value: 19.776
525
+ - type: ndcg_at_10
526
+ value: 29.701
527
+ - type: ndcg_at_100
528
+ value: 35.307
529
+ - type: ndcg_at_1000
530
+ value: 37.942
531
+ - type: ndcg_at_3
532
+ value: 24.764
533
+ - type: ndcg_at_5
534
+ value: 27.025
535
+ - type: precision_at_1
536
+ value: 19.776
537
+ - type: precision_at_10
538
+ value: 5.659
539
+ - type: precision_at_100
540
+ value: 0.971
541
+ - type: precision_at_1000
542
+ value: 0.133
543
+ - type: precision_at_3
544
+ value: 12.065
545
+ - type: precision_at_5
546
+ value: 8.905000000000001
547
+ - type: recall_at_1
548
+ value: 16.073999999999998
549
+ - type: recall_at_10
550
+ value: 41.647
551
+ - type: recall_at_100
552
+ value: 66.884
553
+ - type: recall_at_1000
554
+ value: 85.91499999999999
555
+ - type: recall_at_3
556
+ value: 27.916
557
+ - type: recall_at_5
558
+ value: 33.729
559
+ - task:
560
+ type: Retrieval
561
+ dataset:
562
+ type: BeIR/cqadupstack
563
+ name: MTEB CQADupstackPhysicsRetrieval
564
+ config: default
565
+ split: test
566
+ revision: None
567
+ metrics:
568
+ - type: map_at_1
569
+ value: 28.444999999999997
570
+ - type: map_at_10
571
+ value: 38.218999999999994
572
+ - type: map_at_100
573
+ value: 39.595
574
+ - type: map_at_1000
575
+ value: 39.709
576
+ - type: map_at_3
577
+ value: 35.586
578
+ - type: map_at_5
579
+ value: 36.895
580
+ - type: mrr_at_1
581
+ value: 34.841
582
+ - type: mrr_at_10
583
+ value: 44.106
584
+ - type: mrr_at_100
585
+ value: 44.98
586
+ - type: mrr_at_1000
587
+ value: 45.03
588
+ - type: mrr_at_3
589
+ value: 41.979
590
+ - type: mrr_at_5
591
+ value: 43.047999999999995
592
+ - type: ndcg_at_1
593
+ value: 34.841
594
+ - type: ndcg_at_10
595
+ value: 43.922
596
+ - type: ndcg_at_100
597
+ value: 49.504999999999995
598
+ - type: ndcg_at_1000
599
+ value: 51.675000000000004
600
+ - type: ndcg_at_3
601
+ value: 39.858
602
+ - type: ndcg_at_5
603
+ value: 41.408
604
+ - type: precision_at_1
605
+ value: 34.841
606
+ - type: precision_at_10
607
+ value: 7.872999999999999
608
+ - type: precision_at_100
609
+ value: 1.2449999999999999
610
+ - type: precision_at_1000
611
+ value: 0.161
612
+ - type: precision_at_3
613
+ value: 18.993
614
+ - type: precision_at_5
615
+ value: 13.032
616
+ - type: recall_at_1
617
+ value: 28.444999999999997
618
+ - type: recall_at_10
619
+ value: 54.984
620
+ - type: recall_at_100
621
+ value: 78.342
622
+ - type: recall_at_1000
623
+ value: 92.77
624
+ - type: recall_at_3
625
+ value: 42.842999999999996
626
+ - type: recall_at_5
627
+ value: 47.247
628
+ - task:
629
+ type: Retrieval
630
+ dataset:
631
+ type: BeIR/cqadupstack
632
+ name: MTEB CQADupstackProgrammersRetrieval
633
+ config: default
634
+ split: test
635
+ revision: None
636
+ metrics:
637
+ - type: map_at_1
638
+ value: 23.072
639
+ - type: map_at_10
640
+ value: 32.354
641
+ - type: map_at_100
642
+ value: 33.800000000000004
643
+ - type: map_at_1000
644
+ value: 33.908
645
+ - type: map_at_3
646
+ value: 29.232000000000003
647
+ - type: map_at_5
648
+ value: 31.049
649
+ - type: mrr_at_1
650
+ value: 29.110000000000003
651
+ - type: mrr_at_10
652
+ value: 38.03
653
+ - type: mrr_at_100
654
+ value: 39.032
655
+ - type: mrr_at_1000
656
+ value: 39.086999999999996
657
+ - type: mrr_at_3
658
+ value: 35.407
659
+ - type: mrr_at_5
660
+ value: 36.76
661
+ - type: ndcg_at_1
662
+ value: 29.110000000000003
663
+ - type: ndcg_at_10
664
+ value: 38.231
665
+ - type: ndcg_at_100
666
+ value: 44.425
667
+ - type: ndcg_at_1000
668
+ value: 46.771
669
+ - type: ndcg_at_3
670
+ value: 33.095
671
+ - type: ndcg_at_5
672
+ value: 35.459
673
+ - type: precision_at_1
674
+ value: 29.110000000000003
675
+ - type: precision_at_10
676
+ value: 7.215000000000001
677
+ - type: precision_at_100
678
+ value: 1.2109999999999999
679
+ - type: precision_at_1000
680
+ value: 0.157
681
+ - type: precision_at_3
682
+ value: 16.058
683
+ - type: precision_at_5
684
+ value: 11.644
685
+ - type: recall_at_1
686
+ value: 23.072
687
+ - type: recall_at_10
688
+ value: 50.285999999999994
689
+ - type: recall_at_100
690
+ value: 76.596
691
+ - type: recall_at_1000
692
+ value: 92.861
693
+ - type: recall_at_3
694
+ value: 35.702
695
+ - type: recall_at_5
696
+ value: 42.152
697
+ - task:
698
+ type: Retrieval
699
+ dataset:
700
+ type: BeIR/cqadupstack
701
+ name: MTEB CQADupstackRetrieval
702
+ config: default
703
+ split: test
704
+ revision: None
705
+ metrics:
706
+ - type: map_at_1
707
+ value: 24.937916666666666
708
+ - type: map_at_10
709
+ value: 33.755250000000004
710
+ - type: map_at_100
711
+ value: 34.955999999999996
712
+ - type: map_at_1000
713
+ value: 35.070499999999996
714
+ - type: map_at_3
715
+ value: 30.98708333333333
716
+ - type: map_at_5
717
+ value: 32.51491666666666
718
+ - type: mrr_at_1
719
+ value: 29.48708333333333
720
+ - type: mrr_at_10
721
+ value: 37.92183333333334
722
+ - type: mrr_at_100
723
+ value: 38.76583333333333
724
+ - type: mrr_at_1000
725
+ value: 38.82466666666667
726
+ - type: mrr_at_3
727
+ value: 35.45125
728
+ - type: mrr_at_5
729
+ value: 36.827000000000005
730
+ - type: ndcg_at_1
731
+ value: 29.48708333333333
732
+ - type: ndcg_at_10
733
+ value: 39.05225
734
+ - type: ndcg_at_100
735
+ value: 44.25983333333334
736
+ - type: ndcg_at_1000
737
+ value: 46.568333333333335
738
+ - type: ndcg_at_3
739
+ value: 34.271583333333325
740
+ - type: ndcg_at_5
741
+ value: 36.483916666666666
742
+ - type: precision_at_1
743
+ value: 29.48708333333333
744
+ - type: precision_at_10
745
+ value: 6.865749999999999
746
+ - type: precision_at_100
747
+ value: 1.1195833333333332
748
+ - type: precision_at_1000
749
+ value: 0.15058333333333335
750
+ - type: precision_at_3
751
+ value: 15.742083333333333
752
+ - type: precision_at_5
753
+ value: 11.221916666666667
754
+ - type: recall_at_1
755
+ value: 24.937916666666666
756
+ - type: recall_at_10
757
+ value: 50.650416666666665
758
+ - type: recall_at_100
759
+ value: 73.55383333333334
760
+ - type: recall_at_1000
761
+ value: 89.61691666666667
762
+ - type: recall_at_3
763
+ value: 37.27808333333334
764
+ - type: recall_at_5
765
+ value: 42.99475
766
+ - task:
767
+ type: Retrieval
768
+ dataset:
769
+ type: BeIR/cqadupstack
770
+ name: MTEB CQADupstackStatsRetrieval
771
+ config: default
772
+ split: test
773
+ revision: None
774
+ metrics:
775
+ - type: map_at_1
776
+ value: 23.947
777
+ - type: map_at_10
778
+ value: 30.575000000000003
779
+ - type: map_at_100
780
+ value: 31.465
781
+ - type: map_at_1000
782
+ value: 31.558000000000003
783
+ - type: map_at_3
784
+ value: 28.814
785
+ - type: map_at_5
786
+ value: 29.738999999999997
787
+ - type: mrr_at_1
788
+ value: 26.994
789
+ - type: mrr_at_10
790
+ value: 33.415
791
+ - type: mrr_at_100
792
+ value: 34.18
793
+ - type: mrr_at_1000
794
+ value: 34.245
795
+ - type: mrr_at_3
796
+ value: 31.621
797
+ - type: mrr_at_5
798
+ value: 32.549
799
+ - type: ndcg_at_1
800
+ value: 26.994
801
+ - type: ndcg_at_10
802
+ value: 34.482
803
+ - type: ndcg_at_100
804
+ value: 38.915
805
+ - type: ndcg_at_1000
806
+ value: 41.355
807
+ - type: ndcg_at_3
808
+ value: 31.139
809
+ - type: ndcg_at_5
810
+ value: 32.589
811
+ - type: precision_at_1
812
+ value: 26.994
813
+ - type: precision_at_10
814
+ value: 5.322
815
+ - type: precision_at_100
816
+ value: 0.8160000000000001
817
+ - type: precision_at_1000
818
+ value: 0.11100000000000002
819
+ - type: precision_at_3
820
+ value: 13.344000000000001
821
+ - type: precision_at_5
822
+ value: 8.988
823
+ - type: recall_at_1
824
+ value: 23.947
825
+ - type: recall_at_10
826
+ value: 43.647999999999996
827
+ - type: recall_at_100
828
+ value: 63.851
829
+ - type: recall_at_1000
830
+ value: 82.0
831
+ - type: recall_at_3
832
+ value: 34.288000000000004
833
+ - type: recall_at_5
834
+ value: 38.117000000000004
835
+ - task:
836
+ type: Retrieval
837
+ dataset:
838
+ type: BeIR/cqadupstack
839
+ name: MTEB CQADupstackTexRetrieval
840
+ config: default
841
+ split: test
842
+ revision: None
843
+ metrics:
844
+ - type: map_at_1
845
+ value: 16.197
846
+ - type: map_at_10
847
+ value: 22.968
848
+ - type: map_at_100
849
+ value: 24.095
850
+ - type: map_at_1000
851
+ value: 24.217
852
+ - type: map_at_3
853
+ value: 20.771
854
+ - type: map_at_5
855
+ value: 21.995
856
+ - type: mrr_at_1
857
+ value: 19.511
858
+ - type: mrr_at_10
859
+ value: 26.55
860
+ - type: mrr_at_100
861
+ value: 27.500999999999998
862
+ - type: mrr_at_1000
863
+ value: 27.578999999999997
864
+ - type: mrr_at_3
865
+ value: 24.421
866
+ - type: mrr_at_5
867
+ value: 25.604
868
+ - type: ndcg_at_1
869
+ value: 19.511
870
+ - type: ndcg_at_10
871
+ value: 27.386
872
+ - type: ndcg_at_100
873
+ value: 32.828
874
+ - type: ndcg_at_1000
875
+ value: 35.739
876
+ - type: ndcg_at_3
877
+ value: 23.405
878
+ - type: ndcg_at_5
879
+ value: 25.255
880
+ - type: precision_at_1
881
+ value: 19.511
882
+ - type: precision_at_10
883
+ value: 5.017
884
+ - type: precision_at_100
885
+ value: 0.91
886
+ - type: precision_at_1000
887
+ value: 0.133
888
+ - type: precision_at_3
889
+ value: 11.023
890
+ - type: precision_at_5
891
+ value: 8.025
892
+ - type: recall_at_1
893
+ value: 16.197
894
+ - type: recall_at_10
895
+ value: 37.09
896
+ - type: recall_at_100
897
+ value: 61.778
898
+ - type: recall_at_1000
899
+ value: 82.56599999999999
900
+ - type: recall_at_3
901
+ value: 26.034000000000002
902
+ - type: recall_at_5
903
+ value: 30.762
904
+ - task:
905
+ type: Retrieval
906
+ dataset:
907
+ type: BeIR/cqadupstack
908
+ name: MTEB CQADupstackUnixRetrieval
909
+ config: default
910
+ split: test
911
+ revision: None
912
+ metrics:
913
+ - type: map_at_1
914
+ value: 25.41
915
+ - type: map_at_10
916
+ value: 33.655
917
+ - type: map_at_100
918
+ value: 34.892
919
+ - type: map_at_1000
920
+ value: 34.995
921
+ - type: map_at_3
922
+ value: 30.94
923
+ - type: map_at_5
924
+ value: 32.303
925
+ - type: mrr_at_1
926
+ value: 29.477999999999998
927
+ - type: mrr_at_10
928
+ value: 37.443
929
+ - type: mrr_at_100
930
+ value: 38.383
931
+ - type: mrr_at_1000
932
+ value: 38.440000000000005
933
+ - type: mrr_at_3
934
+ value: 34.949999999999996
935
+ - type: mrr_at_5
936
+ value: 36.228
937
+ - type: ndcg_at_1
938
+ value: 29.477999999999998
939
+ - type: ndcg_at_10
940
+ value: 38.769
941
+ - type: ndcg_at_100
942
+ value: 44.245000000000005
943
+ - type: ndcg_at_1000
944
+ value: 46.593
945
+ - type: ndcg_at_3
946
+ value: 33.623
947
+ - type: ndcg_at_5
948
+ value: 35.766
949
+ - type: precision_at_1
950
+ value: 29.477999999999998
951
+ - type: precision_at_10
952
+ value: 6.455
953
+ - type: precision_at_100
954
+ value: 1.032
955
+ - type: precision_at_1000
956
+ value: 0.135
957
+ - type: precision_at_3
958
+ value: 14.893999999999998
959
+ - type: precision_at_5
960
+ value: 10.485
961
+ - type: recall_at_1
962
+ value: 25.41
963
+ - type: recall_at_10
964
+ value: 50.669
965
+ - type: recall_at_100
966
+ value: 74.084
967
+ - type: recall_at_1000
968
+ value: 90.435
969
+ - type: recall_at_3
970
+ value: 36.679
971
+ - type: recall_at_5
972
+ value: 41.94
973
+ - task:
974
+ type: Retrieval
975
+ dataset:
976
+ type: BeIR/cqadupstack
977
+ name: MTEB CQADupstackWebmastersRetrieval
978
+ config: default
979
+ split: test
980
+ revision: None
981
+ metrics:
982
+ - type: map_at_1
983
+ value: 23.339
984
+ - type: map_at_10
985
+ value: 31.852000000000004
986
+ - type: map_at_100
987
+ value: 33.411
988
+ - type: map_at_1000
989
+ value: 33.62
990
+ - type: map_at_3
991
+ value: 28.929
992
+ - type: map_at_5
993
+ value: 30.542
994
+ - type: mrr_at_1
995
+ value: 28.063
996
+ - type: mrr_at_10
997
+ value: 36.301
998
+ - type: mrr_at_100
999
+ value: 37.288
1000
+ - type: mrr_at_1000
1001
+ value: 37.349
1002
+ - type: mrr_at_3
1003
+ value: 33.663
1004
+ - type: mrr_at_5
1005
+ value: 35.165
1006
+ - type: ndcg_at_1
1007
+ value: 28.063
1008
+ - type: ndcg_at_10
1009
+ value: 37.462
1010
+ - type: ndcg_at_100
1011
+ value: 43.620999999999995
1012
+ - type: ndcg_at_1000
1013
+ value: 46.211
1014
+ - type: ndcg_at_3
1015
+ value: 32.68
1016
+ - type: ndcg_at_5
1017
+ value: 34.981
1018
+ - type: precision_at_1
1019
+ value: 28.063
1020
+ - type: precision_at_10
1021
+ value: 7.1739999999999995
1022
+ - type: precision_at_100
1023
+ value: 1.486
1024
+ - type: precision_at_1000
1025
+ value: 0.23500000000000001
1026
+ - type: precision_at_3
1027
+ value: 15.217
1028
+ - type: precision_at_5
1029
+ value: 11.265
1030
+ - type: recall_at_1
1031
+ value: 23.339
1032
+ - type: recall_at_10
1033
+ value: 48.376999999999995
1034
+ - type: recall_at_100
1035
+ value: 76.053
1036
+ - type: recall_at_1000
1037
+ value: 92.455
1038
+ - type: recall_at_3
1039
+ value: 34.735
1040
+ - type: recall_at_5
1041
+ value: 40.71
1042
+ - task:
1043
+ type: Retrieval
1044
+ dataset:
1045
+ type: BeIR/cqadupstack
1046
+ name: MTEB CQADupstackWordpressRetrieval
1047
+ config: default
1048
+ split: test
1049
+ revision: None
1050
+ metrics:
1051
+ - type: map_at_1
1052
+ value: 18.925
1053
+ - type: map_at_10
1054
+ value: 26.017000000000003
1055
+ - type: map_at_100
1056
+ value: 27.034000000000002
1057
+ - type: map_at_1000
1058
+ value: 27.156000000000002
1059
+ - type: map_at_3
1060
+ value: 23.604
1061
+ - type: map_at_5
1062
+ value: 24.75
1063
+ - type: mrr_at_1
1064
+ value: 20.333000000000002
1065
+ - type: mrr_at_10
1066
+ value: 27.915
1067
+ - type: mrr_at_100
1068
+ value: 28.788000000000004
1069
+ - type: mrr_at_1000
1070
+ value: 28.877999999999997
1071
+ - type: mrr_at_3
1072
+ value: 25.446999999999996
1073
+ - type: mrr_at_5
1074
+ value: 26.648
1075
+ - type: ndcg_at_1
1076
+ value: 20.333000000000002
1077
+ - type: ndcg_at_10
1078
+ value: 30.673000000000002
1079
+ - type: ndcg_at_100
1080
+ value: 35.618
1081
+ - type: ndcg_at_1000
1082
+ value: 38.517
1083
+ - type: ndcg_at_3
1084
+ value: 25.71
1085
+ - type: ndcg_at_5
1086
+ value: 27.679
1087
+ - type: precision_at_1
1088
+ value: 20.333000000000002
1089
+ - type: precision_at_10
1090
+ value: 4.9910000000000005
1091
+ - type: precision_at_100
1092
+ value: 0.8130000000000001
1093
+ - type: precision_at_1000
1094
+ value: 0.117
1095
+ - type: precision_at_3
1096
+ value: 11.029
1097
+ - type: precision_at_5
1098
+ value: 7.8740000000000006
1099
+ - type: recall_at_1
1100
+ value: 18.925
1101
+ - type: recall_at_10
1102
+ value: 43.311
1103
+ - type: recall_at_100
1104
+ value: 66.308
1105
+ - type: recall_at_1000
1106
+ value: 87.49
1107
+ - type: recall_at_3
1108
+ value: 29.596
1109
+ - type: recall_at_5
1110
+ value: 34.245
1111
+ - task:
1112
+ type: Retrieval
1113
+ dataset:
1114
+ type: climate-fever
1115
+ name: MTEB ClimateFEVER
1116
+ config: default
1117
+ split: test
1118
+ revision: None
1119
+ metrics:
1120
+ - type: map_at_1
1121
+ value: 13.714
1122
+ - type: map_at_10
1123
+ value: 23.194
1124
+ - type: map_at_100
1125
+ value: 24.976000000000003
1126
+ - type: map_at_1000
1127
+ value: 25.166
1128
+ - type: map_at_3
1129
+ value: 19.709
1130
+ - type: map_at_5
1131
+ value: 21.523999999999997
1132
+ - type: mrr_at_1
1133
+ value: 30.619000000000003
1134
+ - type: mrr_at_10
1135
+ value: 42.563
1136
+ - type: mrr_at_100
1137
+ value: 43.386
1138
+ - type: mrr_at_1000
1139
+ value: 43.423
1140
+ - type: mrr_at_3
1141
+ value: 39.555
1142
+ - type: mrr_at_5
1143
+ value: 41.268
1144
+ - type: ndcg_at_1
1145
+ value: 30.619000000000003
1146
+ - type: ndcg_at_10
1147
+ value: 31.836
1148
+ - type: ndcg_at_100
1149
+ value: 38.652
1150
+ - type: ndcg_at_1000
1151
+ value: 42.088
1152
+ - type: ndcg_at_3
1153
+ value: 26.733
1154
+ - type: ndcg_at_5
1155
+ value: 28.435
1156
+ - type: precision_at_1
1157
+ value: 30.619000000000003
1158
+ - type: precision_at_10
1159
+ value: 9.751999999999999
1160
+ - type: precision_at_100
1161
+ value: 1.71
1162
+ - type: precision_at_1000
1163
+ value: 0.23500000000000001
1164
+ - type: precision_at_3
1165
+ value: 19.935
1166
+ - type: precision_at_5
1167
+ value: 14.984
1168
+ - type: recall_at_1
1169
+ value: 13.714
1170
+ - type: recall_at_10
1171
+ value: 37.26
1172
+ - type: recall_at_100
1173
+ value: 60.546
1174
+ - type: recall_at_1000
1175
+ value: 79.899
1176
+ - type: recall_at_3
1177
+ value: 24.325
1178
+ - type: recall_at_5
1179
+ value: 29.725
1180
+ - task:
1181
+ type: Retrieval
1182
+ dataset:
1183
+ type: dbpedia-entity
1184
+ name: MTEB DBPedia
1185
+ config: default
1186
+ split: test
1187
+ revision: None
1188
+ metrics:
1189
+ - type: map_at_1
1190
+ value: 8.462
1191
+ - type: map_at_10
1192
+ value: 18.637
1193
+ - type: map_at_100
1194
+ value: 26.131999999999998
1195
+ - type: map_at_1000
1196
+ value: 27.607
1197
+ - type: map_at_3
1198
+ value: 13.333
1199
+ - type: map_at_5
1200
+ value: 15.654000000000002
1201
+ - type: mrr_at_1
1202
+ value: 66.25
1203
+ - type: mrr_at_10
1204
+ value: 74.32600000000001
1205
+ - type: mrr_at_100
1206
+ value: 74.60900000000001
1207
+ - type: mrr_at_1000
1208
+ value: 74.62
1209
+ - type: mrr_at_3
1210
+ value: 72.667
1211
+ - type: mrr_at_5
1212
+ value: 73.817
1213
+ - type: ndcg_at_1
1214
+ value: 53.87499999999999
1215
+ - type: ndcg_at_10
1216
+ value: 40.028999999999996
1217
+ - type: ndcg_at_100
1218
+ value: 44.199
1219
+ - type: ndcg_at_1000
1220
+ value: 51.629999999999995
1221
+ - type: ndcg_at_3
1222
+ value: 44.113
1223
+ - type: ndcg_at_5
1224
+ value: 41.731
1225
+ - type: precision_at_1
1226
+ value: 66.25
1227
+ - type: precision_at_10
1228
+ value: 31.900000000000002
1229
+ - type: precision_at_100
1230
+ value: 10.043000000000001
1231
+ - type: precision_at_1000
1232
+ value: 1.926
1233
+ - type: precision_at_3
1234
+ value: 47.417
1235
+ - type: precision_at_5
1236
+ value: 40.65
1237
+ - type: recall_at_1
1238
+ value: 8.462
1239
+ - type: recall_at_10
1240
+ value: 24.293
1241
+ - type: recall_at_100
1242
+ value: 50.146
1243
+ - type: recall_at_1000
1244
+ value: 74.034
1245
+ - type: recall_at_3
1246
+ value: 14.967
1247
+ - type: recall_at_5
1248
+ value: 18.682000000000002
1249
+ - task:
1250
+ type: Classification
1251
+ dataset:
1252
+ type: mteb/emotion
1253
+ name: MTEB EmotionClassification
1254
+ config: default
1255
+ split: test
1256
+ revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1257
+ metrics:
1258
+ - type: accuracy
1259
+ value: 47.84499999999999
1260
+ - type: f1
1261
+ value: 42.48106691979349
1262
+ - task:
1263
+ type: Retrieval
1264
+ dataset:
1265
+ type: fever
1266
+ name: MTEB FEVER
1267
+ config: default
1268
+ split: test
1269
+ revision: None
1270
+ metrics:
1271
+ - type: map_at_1
1272
+ value: 74.034
1273
+ - type: map_at_10
1274
+ value: 82.76
1275
+ - type: map_at_100
1276
+ value: 82.968
1277
+ - type: map_at_1000
1278
+ value: 82.98299999999999
1279
+ - type: map_at_3
1280
+ value: 81.768
1281
+ - type: map_at_5
1282
+ value: 82.418
1283
+ - type: mrr_at_1
1284
+ value: 80.048
1285
+ - type: mrr_at_10
1286
+ value: 87.64999999999999
1287
+ - type: mrr_at_100
1288
+ value: 87.712
1289
+ - type: mrr_at_1000
1290
+ value: 87.713
1291
+ - type: mrr_at_3
1292
+ value: 87.01100000000001
1293
+ - type: mrr_at_5
1294
+ value: 87.466
1295
+ - type: ndcg_at_1
1296
+ value: 80.048
1297
+ - type: ndcg_at_10
1298
+ value: 86.643
1299
+ - type: ndcg_at_100
1300
+ value: 87.361
1301
+ - type: ndcg_at_1000
1302
+ value: 87.606
1303
+ - type: ndcg_at_3
1304
+ value: 85.137
1305
+ - type: ndcg_at_5
1306
+ value: 86.016
1307
+ - type: precision_at_1
1308
+ value: 80.048
1309
+ - type: precision_at_10
1310
+ value: 10.372
1311
+ - type: precision_at_100
1312
+ value: 1.093
1313
+ - type: precision_at_1000
1314
+ value: 0.11299999999999999
1315
+ - type: precision_at_3
1316
+ value: 32.638
1317
+ - type: precision_at_5
1318
+ value: 20.177
1319
+ - type: recall_at_1
1320
+ value: 74.034
1321
+ - type: recall_at_10
1322
+ value: 93.769
1323
+ - type: recall_at_100
1324
+ value: 96.569
1325
+ - type: recall_at_1000
1326
+ value: 98.039
1327
+ - type: recall_at_3
1328
+ value: 89.581
1329
+ - type: recall_at_5
1330
+ value: 91.906
1331
+ - task:
1332
+ type: Retrieval
1333
+ dataset:
1334
+ type: fiqa
1335
+ name: MTEB FiQA2018
1336
+ config: default
1337
+ split: test
1338
+ revision: None
1339
+ metrics:
1340
+ - type: map_at_1
1341
+ value: 20.5
1342
+ - type: map_at_10
1343
+ value: 32.857
1344
+ - type: map_at_100
1345
+ value: 34.589
1346
+ - type: map_at_1000
1347
+ value: 34.778
1348
+ - type: map_at_3
1349
+ value: 29.160999999999998
1350
+ - type: map_at_5
1351
+ value: 31.033
1352
+ - type: mrr_at_1
1353
+ value: 40.123
1354
+ - type: mrr_at_10
1355
+ value: 48.776
1356
+ - type: mrr_at_100
1357
+ value: 49.495
1358
+ - type: mrr_at_1000
1359
+ value: 49.539
1360
+ - type: mrr_at_3
1361
+ value: 46.605000000000004
1362
+ - type: mrr_at_5
1363
+ value: 47.654
1364
+ - type: ndcg_at_1
1365
+ value: 40.123
1366
+ - type: ndcg_at_10
1367
+ value: 40.343
1368
+ - type: ndcg_at_100
1369
+ value: 46.56
1370
+ - type: ndcg_at_1000
1371
+ value: 49.777
1372
+ - type: ndcg_at_3
1373
+ value: 37.322
1374
+ - type: ndcg_at_5
1375
+ value: 37.791000000000004
1376
+ - type: precision_at_1
1377
+ value: 40.123
1378
+ - type: precision_at_10
1379
+ value: 11.08
1380
+ - type: precision_at_100
1381
+ value: 1.752
1382
+ - type: precision_at_1000
1383
+ value: 0.232
1384
+ - type: precision_at_3
1385
+ value: 24.897
1386
+ - type: precision_at_5
1387
+ value: 17.809
1388
+ - type: recall_at_1
1389
+ value: 20.5
1390
+ - type: recall_at_10
1391
+ value: 46.388
1392
+ - type: recall_at_100
1393
+ value: 69.552
1394
+ - type: recall_at_1000
1395
+ value: 89.011
1396
+ - type: recall_at_3
1397
+ value: 33.617999999999995
1398
+ - type: recall_at_5
1399
+ value: 38.211
1400
+ - task:
1401
+ type: Retrieval
1402
+ dataset:
1403
+ type: hotpotqa
1404
+ name: MTEB HotpotQA
1405
+ config: default
1406
+ split: test
1407
+ revision: None
1408
+ metrics:
1409
+ - type: map_at_1
1410
+ value: 39.135999999999996
1411
+ - type: map_at_10
1412
+ value: 61.673
1413
+ - type: map_at_100
1414
+ value: 62.562
1415
+ - type: map_at_1000
1416
+ value: 62.62
1417
+ - type: map_at_3
1418
+ value: 58.467999999999996
1419
+ - type: map_at_5
1420
+ value: 60.463
1421
+ - type: mrr_at_1
1422
+ value: 78.271
1423
+ - type: mrr_at_10
1424
+ value: 84.119
1425
+ - type: mrr_at_100
1426
+ value: 84.29299999999999
1427
+ - type: mrr_at_1000
1428
+ value: 84.299
1429
+ - type: mrr_at_3
1430
+ value: 83.18900000000001
1431
+ - type: mrr_at_5
1432
+ value: 83.786
1433
+ - type: ndcg_at_1
1434
+ value: 78.271
1435
+ - type: ndcg_at_10
1436
+ value: 69.935
1437
+ - type: ndcg_at_100
1438
+ value: 73.01299999999999
1439
+ - type: ndcg_at_1000
1440
+ value: 74.126
1441
+ - type: ndcg_at_3
1442
+ value: 65.388
1443
+ - type: ndcg_at_5
1444
+ value: 67.906
1445
+ - type: precision_at_1
1446
+ value: 78.271
1447
+ - type: precision_at_10
1448
+ value: 14.562
1449
+ - type: precision_at_100
1450
+ value: 1.6969999999999998
1451
+ - type: precision_at_1000
1452
+ value: 0.184
1453
+ - type: precision_at_3
1454
+ value: 41.841
1455
+ - type: precision_at_5
1456
+ value: 27.087
1457
+ - type: recall_at_1
1458
+ value: 39.135999999999996
1459
+ - type: recall_at_10
1460
+ value: 72.809
1461
+ - type: recall_at_100
1462
+ value: 84.86200000000001
1463
+ - type: recall_at_1000
1464
+ value: 92.208
1465
+ - type: recall_at_3
1466
+ value: 62.76199999999999
1467
+ - type: recall_at_5
1468
+ value: 67.718
1469
+ - task:
1470
+ type: Classification
1471
+ dataset:
1472
+ type: mteb/imdb
1473
+ name: MTEB ImdbClassification
1474
+ config: default
1475
+ split: test
1476
+ revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1477
+ metrics:
1478
+ - type: accuracy
1479
+ value: 90.60600000000001
1480
+ - type: ap
1481
+ value: 86.6579587804335
1482
+ - type: f1
1483
+ value: 90.5938853929307
1484
+ - task:
1485
+ type: Retrieval
1486
+ dataset:
1487
+ type: msmarco
1488
+ name: MTEB MSMARCO
1489
+ config: default
1490
+ split: dev
1491
+ revision: None
1492
+ metrics:
1493
+ - type: map_at_1
1494
+ value: 21.852
1495
+ - type: map_at_10
1496
+ value: 33.982
1497
+ - type: map_at_100
1498
+ value: 35.116
1499
+ - type: map_at_1000
1500
+ value: 35.167
1501
+ - type: map_at_3
1502
+ value: 30.134
1503
+ - type: map_at_5
1504
+ value: 32.340999999999994
1505
+ - type: mrr_at_1
1506
+ value: 22.479
1507
+ - type: mrr_at_10
1508
+ value: 34.594
1509
+ - type: mrr_at_100
1510
+ value: 35.672
1511
+ - type: mrr_at_1000
1512
+ value: 35.716
1513
+ - type: mrr_at_3
1514
+ value: 30.84
1515
+ - type: mrr_at_5
1516
+ value: 32.998
1517
+ - type: ndcg_at_1
1518
+ value: 22.493
1519
+ - type: ndcg_at_10
1520
+ value: 40.833000000000006
1521
+ - type: ndcg_at_100
1522
+ value: 46.357
1523
+ - type: ndcg_at_1000
1524
+ value: 47.637
1525
+ - type: ndcg_at_3
1526
+ value: 32.995999999999995
1527
+ - type: ndcg_at_5
1528
+ value: 36.919000000000004
1529
+ - type: precision_at_1
1530
+ value: 22.493
1531
+ - type: precision_at_10
1532
+ value: 6.465999999999999
1533
+ - type: precision_at_100
1534
+ value: 0.9249999999999999
1535
+ - type: precision_at_1000
1536
+ value: 0.104
1537
+ - type: precision_at_3
1538
+ value: 14.030999999999999
1539
+ - type: precision_at_5
1540
+ value: 10.413
1541
+ - type: recall_at_1
1542
+ value: 21.852
1543
+ - type: recall_at_10
1544
+ value: 61.934999999999995
1545
+ - type: recall_at_100
1546
+ value: 87.611
1547
+ - type: recall_at_1000
1548
+ value: 97.441
1549
+ - type: recall_at_3
1550
+ value: 40.583999999999996
1551
+ - type: recall_at_5
1552
+ value: 49.992999999999995
1553
+ - task:
1554
+ type: Classification
1555
+ dataset:
1556
+ type: mteb/mtop_domain
1557
+ name: MTEB MTOPDomainClassification (en)
1558
+ config: en
1559
+ split: test
1560
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1561
+ metrics:
1562
+ - type: accuracy
1563
+ value: 93.36069311445507
1564
+ - type: f1
1565
+ value: 93.16456330371453
1566
+ - task:
1567
+ type: Classification
1568
+ dataset:
1569
+ type: mteb/mtop_intent
1570
+ name: MTEB MTOPIntentClassification (en)
1571
+ config: en
1572
+ split: test
1573
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1574
+ metrics:
1575
+ - type: accuracy
1576
+ value: 74.74692202462381
1577
+ - type: f1
1578
+ value: 58.17903579421599
1579
+ - task:
1580
+ type: Classification
1581
+ dataset:
1582
+ type: mteb/amazon_massive_intent
1583
+ name: MTEB MassiveIntentClassification (en)
1584
+ config: en
1585
+ split: test
1586
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1587
+ metrics:
1588
+ - type: accuracy
1589
+ value: 74.80833893745796
1590
+ - type: f1
1591
+ value: 72.70786592684664
1592
+ - task:
1593
+ type: Classification
1594
+ dataset:
1595
+ type: mteb/amazon_massive_scenario
1596
+ name: MTEB MassiveScenarioClassification (en)
1597
+ config: en
1598
+ split: test
1599
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
1600
+ metrics:
1601
+ - type: accuracy
1602
+ value: 78.69872225958305
1603
+ - type: f1
1604
+ value: 78.61626934504731
1605
+ - task:
1606
+ type: Clustering
1607
+ dataset:
1608
+ type: mteb/medrxiv-clustering-p2p
1609
+ name: MTEB MedrxivClusteringP2P
1610
+ config: default
1611
+ split: test
1612
+ revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1613
+ metrics:
1614
+ - type: v_measure
1615
+ value: 33.058658628717694
1616
+ - task:
1617
+ type: Clustering
1618
+ dataset:
1619
+ type: mteb/medrxiv-clustering-s2s
1620
+ name: MTEB MedrxivClusteringS2S
1621
+ config: default
1622
+ split: test
1623
+ revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1624
+ metrics:
1625
+ - type: v_measure
1626
+ value: 30.85561739360599
1627
+ - task:
1628
+ type: Reranking
1629
+ dataset:
1630
+ type: mteb/mind_small
1631
+ name: MTEB MindSmallReranking
1632
+ config: default
1633
+ split: test
1634
+ revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1635
+ metrics:
1636
+ - type: map
1637
+ value: 31.290259910144385
1638
+ - type: mrr
1639
+ value: 32.44223046102856
1640
+ - task:
1641
+ type: Retrieval
1642
+ dataset:
1643
+ type: nfcorpus
1644
+ name: MTEB NFCorpus
1645
+ config: default
1646
+ split: test
1647
+ revision: None
1648
+ metrics:
1649
+ - type: map_at_1
1650
+ value: 5.288
1651
+ - type: map_at_10
1652
+ value: 12.267999999999999
1653
+ - type: map_at_100
1654
+ value: 15.557000000000002
1655
+ - type: map_at_1000
1656
+ value: 16.98
1657
+ - type: map_at_3
1658
+ value: 8.866
1659
+ - type: map_at_5
1660
+ value: 10.418
1661
+ - type: mrr_at_1
1662
+ value: 43.653
1663
+ - type: mrr_at_10
1664
+ value: 52.681
1665
+ - type: mrr_at_100
1666
+ value: 53.315999999999995
1667
+ - type: mrr_at_1000
1668
+ value: 53.357
1669
+ - type: mrr_at_3
1670
+ value: 51.393
1671
+ - type: mrr_at_5
1672
+ value: 51.903999999999996
1673
+ - type: ndcg_at_1
1674
+ value: 42.415000000000006
1675
+ - type: ndcg_at_10
1676
+ value: 34.305
1677
+ - type: ndcg_at_100
1678
+ value: 30.825999999999997
1679
+ - type: ndcg_at_1000
1680
+ value: 39.393
1681
+ - type: ndcg_at_3
1682
+ value: 39.931
1683
+ - type: ndcg_at_5
1684
+ value: 37.519999999999996
1685
+ - type: precision_at_1
1686
+ value: 43.653
1687
+ - type: precision_at_10
1688
+ value: 25.728
1689
+ - type: precision_at_100
1690
+ value: 7.932
1691
+ - type: precision_at_1000
1692
+ value: 2.07
1693
+ - type: precision_at_3
1694
+ value: 38.184000000000005
1695
+ - type: precision_at_5
1696
+ value: 32.879000000000005
1697
+ - type: recall_at_1
1698
+ value: 5.288
1699
+ - type: recall_at_10
1700
+ value: 16.195
1701
+ - type: recall_at_100
1702
+ value: 31.135
1703
+ - type: recall_at_1000
1704
+ value: 61.531000000000006
1705
+ - type: recall_at_3
1706
+ value: 10.313
1707
+ - type: recall_at_5
1708
+ value: 12.754999999999999
1709
+ - task:
1710
+ type: Retrieval
1711
+ dataset:
1712
+ type: nq
1713
+ name: MTEB NQ
1714
+ config: default
1715
+ split: test
1716
+ revision: None
1717
+ metrics:
1718
+ - type: map_at_1
1719
+ value: 28.216
1720
+ - type: map_at_10
1721
+ value: 42.588
1722
+ - type: map_at_100
1723
+ value: 43.702999999999996
1724
+ - type: map_at_1000
1725
+ value: 43.739
1726
+ - type: map_at_3
1727
+ value: 38.177
1728
+ - type: map_at_5
1729
+ value: 40.754000000000005
1730
+ - type: mrr_at_1
1731
+ value: 31.866
1732
+ - type: mrr_at_10
1733
+ value: 45.189
1734
+ - type: mrr_at_100
1735
+ value: 46.056000000000004
1736
+ - type: mrr_at_1000
1737
+ value: 46.081
1738
+ - type: mrr_at_3
1739
+ value: 41.526999999999994
1740
+ - type: mrr_at_5
1741
+ value: 43.704
1742
+ - type: ndcg_at_1
1743
+ value: 31.837
1744
+ - type: ndcg_at_10
1745
+ value: 50.178
1746
+ - type: ndcg_at_100
1747
+ value: 54.98800000000001
1748
+ - type: ndcg_at_1000
1749
+ value: 55.812
1750
+ - type: ndcg_at_3
1751
+ value: 41.853
1752
+ - type: ndcg_at_5
1753
+ value: 46.153
1754
+ - type: precision_at_1
1755
+ value: 31.837
1756
+ - type: precision_at_10
1757
+ value: 8.43
1758
+ - type: precision_at_100
1759
+ value: 1.1119999999999999
1760
+ - type: precision_at_1000
1761
+ value: 0.11900000000000001
1762
+ - type: precision_at_3
1763
+ value: 19.023
1764
+ - type: precision_at_5
1765
+ value: 13.911000000000001
1766
+ - type: recall_at_1
1767
+ value: 28.216
1768
+ - type: recall_at_10
1769
+ value: 70.8
1770
+ - type: recall_at_100
1771
+ value: 91.857
1772
+ - type: recall_at_1000
1773
+ value: 97.941
1774
+ - type: recall_at_3
1775
+ value: 49.196
1776
+ - type: recall_at_5
1777
+ value: 59.072
1778
+ - task:
1779
+ type: Retrieval
1780
+ dataset:
1781
+ type: quora
1782
+ name: MTEB QuoraRetrieval
1783
+ config: default
1784
+ split: test
1785
+ revision: None
1786
+ metrics:
1787
+ - type: map_at_1
1788
+ value: 71.22800000000001
1789
+ - type: map_at_10
1790
+ value: 85.115
1791
+ - type: map_at_100
1792
+ value: 85.72
1793
+ - type: map_at_1000
1794
+ value: 85.737
1795
+ - type: map_at_3
1796
+ value: 82.149
1797
+ - type: map_at_5
1798
+ value: 84.029
1799
+ - type: mrr_at_1
1800
+ value: 81.96
1801
+ - type: mrr_at_10
1802
+ value: 88.00200000000001
1803
+ - type: mrr_at_100
1804
+ value: 88.088
1805
+ - type: mrr_at_1000
1806
+ value: 88.089
1807
+ - type: mrr_at_3
1808
+ value: 87.055
1809
+ - type: mrr_at_5
1810
+ value: 87.715
1811
+ - type: ndcg_at_1
1812
+ value: 82.01
1813
+ - type: ndcg_at_10
1814
+ value: 88.78
1815
+ - type: ndcg_at_100
1816
+ value: 89.91
1817
+ - type: ndcg_at_1000
1818
+ value: 90.013
1819
+ - type: ndcg_at_3
1820
+ value: 85.957
1821
+ - type: ndcg_at_5
1822
+ value: 87.56
1823
+ - type: precision_at_1
1824
+ value: 82.01
1825
+ - type: precision_at_10
1826
+ value: 13.462
1827
+ - type: precision_at_100
1828
+ value: 1.528
1829
+ - type: precision_at_1000
1830
+ value: 0.157
1831
+ - type: precision_at_3
1832
+ value: 37.553
1833
+ - type: precision_at_5
1834
+ value: 24.732000000000003
1835
+ - type: recall_at_1
1836
+ value: 71.22800000000001
1837
+ - type: recall_at_10
1838
+ value: 95.69
1839
+ - type: recall_at_100
1840
+ value: 99.531
1841
+ - type: recall_at_1000
1842
+ value: 99.98
1843
+ - type: recall_at_3
1844
+ value: 87.632
1845
+ - type: recall_at_5
1846
+ value: 92.117
1847
+ - task:
1848
+ type: Clustering
1849
+ dataset:
1850
+ type: mteb/reddit-clustering
1851
+ name: MTEB RedditClustering
1852
+ config: default
1853
+ split: test
1854
+ revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1855
+ metrics:
1856
+ - type: v_measure
1857
+ value: 52.31768034366916
1858
+ - task:
1859
+ type: Clustering
1860
+ dataset:
1861
+ type: mteb/reddit-clustering-p2p
1862
+ name: MTEB RedditClusteringP2P
1863
+ config: default
1864
+ split: test
1865
+ revision: 282350215ef01743dc01b456c7f5241fa8937f16
1866
+ metrics:
1867
+ - type: v_measure
1868
+ value: 60.640266772723606
1869
+ - task:
1870
+ type: Retrieval
1871
+ dataset:
1872
+ type: scidocs
1873
+ name: MTEB SCIDOCS
1874
+ config: default
1875
+ split: test
1876
+ revision: None
1877
+ metrics:
1878
+ - type: map_at_1
1879
+ value: 4.7780000000000005
1880
+ - type: map_at_10
1881
+ value: 12.299
1882
+ - type: map_at_100
1883
+ value: 14.363000000000001
1884
+ - type: map_at_1000
1885
+ value: 14.71
1886
+ - type: map_at_3
1887
+ value: 8.738999999999999
1888
+ - type: map_at_5
1889
+ value: 10.397
1890
+ - type: mrr_at_1
1891
+ value: 23.599999999999998
1892
+ - type: mrr_at_10
1893
+ value: 34.845
1894
+ - type: mrr_at_100
1895
+ value: 35.916
1896
+ - type: mrr_at_1000
1897
+ value: 35.973
1898
+ - type: mrr_at_3
1899
+ value: 31.7
1900
+ - type: mrr_at_5
1901
+ value: 33.535
1902
+ - type: ndcg_at_1
1903
+ value: 23.599999999999998
1904
+ - type: ndcg_at_10
1905
+ value: 20.522000000000002
1906
+ - type: ndcg_at_100
1907
+ value: 28.737000000000002
1908
+ - type: ndcg_at_1000
1909
+ value: 34.596
1910
+ - type: ndcg_at_3
1911
+ value: 19.542
1912
+ - type: ndcg_at_5
1913
+ value: 16.958000000000002
1914
+ - type: precision_at_1
1915
+ value: 23.599999999999998
1916
+ - type: precision_at_10
1917
+ value: 10.67
1918
+ - type: precision_at_100
1919
+ value: 2.259
1920
+ - type: precision_at_1000
1921
+ value: 0.367
1922
+ - type: precision_at_3
1923
+ value: 18.333
1924
+ - type: precision_at_5
1925
+ value: 14.879999999999999
1926
+ - type: recall_at_1
1927
+ value: 4.7780000000000005
1928
+ - type: recall_at_10
1929
+ value: 21.617
1930
+ - type: recall_at_100
1931
+ value: 45.905
1932
+ - type: recall_at_1000
1933
+ value: 74.42
1934
+ - type: recall_at_3
1935
+ value: 11.148
1936
+ - type: recall_at_5
1937
+ value: 15.082999999999998
1938
+ - task:
1939
+ type: STS
1940
+ dataset:
1941
+ type: mteb/sickr-sts
1942
+ name: MTEB SICK-R
1943
+ config: default
1944
+ split: test
1945
+ revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1946
+ metrics:
1947
+ - type: cos_sim_pearson
1948
+ value: 83.22372750297885
1949
+ - type: cos_sim_spearman
1950
+ value: 79.40972617119405
1951
+ - type: euclidean_pearson
1952
+ value: 80.6101072020434
1953
+ - type: euclidean_spearman
1954
+ value: 79.53844217225202
1955
+ - type: manhattan_pearson
1956
+ value: 80.57265975286111
1957
+ - type: manhattan_spearman
1958
+ value: 79.46335611792958
1959
+ - task:
1960
+ type: STS
1961
+ dataset:
1962
+ type: mteb/sts12-sts
1963
+ name: MTEB STS12
1964
+ config: default
1965
+ split: test
1966
+ revision: a0d554a64d88156834ff5ae9920b964011b16384
1967
+ metrics:
1968
+ - type: cos_sim_pearson
1969
+ value: 85.43713315520749
1970
+ - type: cos_sim_spearman
1971
+ value: 77.44128693329532
1972
+ - type: euclidean_pearson
1973
+ value: 81.63869928101123
1974
+ - type: euclidean_spearman
1975
+ value: 77.29512977961515
1976
+ - type: manhattan_pearson
1977
+ value: 81.63704185566183
1978
+ - type: manhattan_spearman
1979
+ value: 77.29909412738657
1980
+ - task:
1981
+ type: STS
1982
+ dataset:
1983
+ type: mteb/sts13-sts
1984
+ name: MTEB STS13
1985
+ config: default
1986
+ split: test
1987
+ revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1988
+ metrics:
1989
+ - type: cos_sim_pearson
1990
+ value: 81.59451537860527
1991
+ - type: cos_sim_spearman
1992
+ value: 82.97994638856723
1993
+ - type: euclidean_pearson
1994
+ value: 82.89478688288412
1995
+ - type: euclidean_spearman
1996
+ value: 83.58740751053104
1997
+ - type: manhattan_pearson
1998
+ value: 82.69140840941608
1999
+ - type: manhattan_spearman
2000
+ value: 83.33665956040555
2001
+ - task:
2002
+ type: STS
2003
+ dataset:
2004
+ type: mteb/sts14-sts
2005
+ name: MTEB STS14
2006
+ config: default
2007
+ split: test
2008
+ revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
2009
+ metrics:
2010
+ - type: cos_sim_pearson
2011
+ value: 82.00756527711764
2012
+ - type: cos_sim_spearman
2013
+ value: 81.83560996841379
2014
+ - type: euclidean_pearson
2015
+ value: 82.07684151976518
2016
+ - type: euclidean_spearman
2017
+ value: 82.00913052060511
2018
+ - type: manhattan_pearson
2019
+ value: 82.05690778488794
2020
+ - type: manhattan_spearman
2021
+ value: 82.02260252019525
2022
+ - task:
2023
+ type: STS
2024
+ dataset:
2025
+ type: mteb/sts15-sts
2026
+ name: MTEB STS15
2027
+ config: default
2028
+ split: test
2029
+ revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2030
+ metrics:
2031
+ - type: cos_sim_pearson
2032
+ value: 86.13710262895447
2033
+ - type: cos_sim_spearman
2034
+ value: 87.26412811156248
2035
+ - type: euclidean_pearson
2036
+ value: 86.94151453230228
2037
+ - type: euclidean_spearman
2038
+ value: 87.5363796699571
2039
+ - type: manhattan_pearson
2040
+ value: 86.86989424083748
2041
+ - type: manhattan_spearman
2042
+ value: 87.47315940781353
2043
+ - task:
2044
+ type: STS
2045
+ dataset:
2046
+ type: mteb/sts16-sts
2047
+ name: MTEB STS16
2048
+ config: default
2049
+ split: test
2050
+ revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2051
+ metrics:
2052
+ - type: cos_sim_pearson
2053
+ value: 83.0230597603627
2054
+ - type: cos_sim_spearman
2055
+ value: 84.93344499318864
2056
+ - type: euclidean_pearson
2057
+ value: 84.23754743431141
2058
+ - type: euclidean_spearman
2059
+ value: 85.09707376597099
2060
+ - type: manhattan_pearson
2061
+ value: 84.04325160987763
2062
+ - type: manhattan_spearman
2063
+ value: 84.89353071339909
2064
+ - task:
2065
+ type: STS
2066
+ dataset:
2067
+ type: mteb/sts17-crosslingual-sts
2068
+ name: MTEB STS17 (en-en)
2069
+ config: en-en
2070
+ split: test
2071
+ revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2072
+ metrics:
2073
+ - type: cos_sim_pearson
2074
+ value: 86.75620824563921
2075
+ - type: cos_sim_spearman
2076
+ value: 87.15065513706398
2077
+ - type: euclidean_pearson
2078
+ value: 88.26281533633521
2079
+ - type: euclidean_spearman
2080
+ value: 87.51963738643983
2081
+ - type: manhattan_pearson
2082
+ value: 88.25599267618065
2083
+ - type: manhattan_spearman
2084
+ value: 87.58048736047483
2085
+ - task:
2086
+ type: STS
2087
+ dataset:
2088
+ type: mteb/sts22-crosslingual-sts
2089
+ name: MTEB STS22 (en)
2090
+ config: en
2091
+ split: test
2092
+ revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2093
+ metrics:
2094
+ - type: cos_sim_pearson
2095
+ value: 64.74645319195137
2096
+ - type: cos_sim_spearman
2097
+ value: 65.29996325037214
2098
+ - type: euclidean_pearson
2099
+ value: 67.04297794086443
2100
+ - type: euclidean_spearman
2101
+ value: 65.43841726694343
2102
+ - type: manhattan_pearson
2103
+ value: 67.39459955690904
2104
+ - type: manhattan_spearman
2105
+ value: 65.92864704413651
2106
+ - task:
2107
+ type: STS
2108
+ dataset:
2109
+ type: mteb/stsbenchmark-sts
2110
+ name: MTEB STSBenchmark
2111
+ config: default
2112
+ split: test
2113
+ revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2114
+ metrics:
2115
+ - type: cos_sim_pearson
2116
+ value: 84.31291020270801
2117
+ - type: cos_sim_spearman
2118
+ value: 85.86473738688068
2119
+ - type: euclidean_pearson
2120
+ value: 85.65537275064152
2121
+ - type: euclidean_spearman
2122
+ value: 86.13087454209642
2123
+ - type: manhattan_pearson
2124
+ value: 85.43946955047609
2125
+ - type: manhattan_spearman
2126
+ value: 85.91568175344916
2127
+ - task:
2128
+ type: Reranking
2129
+ dataset:
2130
+ type: mteb/scidocs-reranking
2131
+ name: MTEB SciDocsRR
2132
+ config: default
2133
+ split: test
2134
+ revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2135
+ metrics:
2136
+ - type: map
2137
+ value: 85.93798118350695
2138
+ - type: mrr
2139
+ value: 95.93536274908824
2140
+ - task:
2141
+ type: Retrieval
2142
+ dataset:
2143
+ type: scifact
2144
+ name: MTEB SciFact
2145
+ config: default
2146
+ split: test
2147
+ revision: None
2148
+ metrics:
2149
+ - type: map_at_1
2150
+ value: 57.594
2151
+ - type: map_at_10
2152
+ value: 66.81899999999999
2153
+ - type: map_at_100
2154
+ value: 67.368
2155
+ - type: map_at_1000
2156
+ value: 67.4
2157
+ - type: map_at_3
2158
+ value: 64.061
2159
+ - type: map_at_5
2160
+ value: 65.47
2161
+ - type: mrr_at_1
2162
+ value: 60.667
2163
+ - type: mrr_at_10
2164
+ value: 68.219
2165
+ - type: mrr_at_100
2166
+ value: 68.655
2167
+ - type: mrr_at_1000
2168
+ value: 68.684
2169
+ - type: mrr_at_3
2170
+ value: 66.22200000000001
2171
+ - type: mrr_at_5
2172
+ value: 67.289
2173
+ - type: ndcg_at_1
2174
+ value: 60.667
2175
+ - type: ndcg_at_10
2176
+ value: 71.275
2177
+ - type: ndcg_at_100
2178
+ value: 73.642
2179
+ - type: ndcg_at_1000
2180
+ value: 74.373
2181
+ - type: ndcg_at_3
2182
+ value: 66.521
2183
+ - type: ndcg_at_5
2184
+ value: 68.581
2185
+ - type: precision_at_1
2186
+ value: 60.667
2187
+ - type: precision_at_10
2188
+ value: 9.433
2189
+ - type: precision_at_100
2190
+ value: 1.0699999999999998
2191
+ - type: precision_at_1000
2192
+ value: 0.11299999999999999
2193
+ - type: precision_at_3
2194
+ value: 25.556
2195
+ - type: precision_at_5
2196
+ value: 16.8
2197
+ - type: recall_at_1
2198
+ value: 57.594
2199
+ - type: recall_at_10
2200
+ value: 83.622
2201
+ - type: recall_at_100
2202
+ value: 94.167
2203
+ - type: recall_at_1000
2204
+ value: 99.667
2205
+ - type: recall_at_3
2206
+ value: 70.64399999999999
2207
+ - type: recall_at_5
2208
+ value: 75.983
2209
+ - task:
2210
+ type: PairClassification
2211
+ dataset:
2212
+ type: mteb/sprintduplicatequestions-pairclassification
2213
+ name: MTEB SprintDuplicateQuestions
2214
+ config: default
2215
+ split: test
2216
+ revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2217
+ metrics:
2218
+ - type: cos_sim_accuracy
2219
+ value: 99.85841584158416
2220
+ - type: cos_sim_ap
2221
+ value: 96.66996142314342
2222
+ - type: cos_sim_f1
2223
+ value: 92.83208020050125
2224
+ - type: cos_sim_precision
2225
+ value: 93.06532663316584
2226
+ - type: cos_sim_recall
2227
+ value: 92.60000000000001
2228
+ - type: dot_accuracy
2229
+ value: 99.85841584158416
2230
+ - type: dot_ap
2231
+ value: 96.6775307676576
2232
+ - type: dot_f1
2233
+ value: 92.69289729177312
2234
+ - type: dot_precision
2235
+ value: 94.77533960292581
2236
+ - type: dot_recall
2237
+ value: 90.7
2238
+ - type: euclidean_accuracy
2239
+ value: 99.86138613861387
2240
+ - type: euclidean_ap
2241
+ value: 96.6338454403108
2242
+ - type: euclidean_f1
2243
+ value: 92.92214357937311
2244
+ - type: euclidean_precision
2245
+ value: 93.96728016359918
2246
+ - type: euclidean_recall
2247
+ value: 91.9
2248
+ - type: manhattan_accuracy
2249
+ value: 99.86237623762376
2250
+ - type: manhattan_ap
2251
+ value: 96.60370449645053
2252
+ - type: manhattan_f1
2253
+ value: 92.91177970423253
2254
+ - type: manhattan_precision
2255
+ value: 94.7970863683663
2256
+ - type: manhattan_recall
2257
+ value: 91.10000000000001
2258
+ - type: max_accuracy
2259
+ value: 99.86237623762376
2260
+ - type: max_ap
2261
+ value: 96.6775307676576
2262
+ - type: max_f1
2263
+ value: 92.92214357937311
2264
+ - task:
2265
+ type: Clustering
2266
+ dataset:
2267
+ type: mteb/stackexchange-clustering
2268
+ name: MTEB StackExchangeClustering
2269
+ config: default
2270
+ split: test
2271
+ revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2272
+ metrics:
2273
+ - type: v_measure
2274
+ value: 60.77977058695198
2275
+ - task:
2276
+ type: Clustering
2277
+ dataset:
2278
+ type: mteb/stackexchange-clustering-p2p
2279
+ name: MTEB StackExchangeClusteringP2P
2280
+ config: default
2281
+ split: test
2282
+ revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2283
+ metrics:
2284
+ - type: v_measure
2285
+ value: 35.2725272535638
2286
+ - task:
2287
+ type: Reranking
2288
+ dataset:
2289
+ type: mteb/stackoverflowdupquestions-reranking
2290
+ name: MTEB StackOverflowDupQuestions
2291
+ config: default
2292
+ split: test
2293
+ revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2294
+ metrics:
2295
+ - type: map
2296
+ value: 53.64052466362125
2297
+ - type: mrr
2298
+ value: 54.533067014684654
2299
+ - task:
2300
+ type: Summarization
2301
+ dataset:
2302
+ type: mteb/summeval
2303
+ name: MTEB SummEval
2304
+ config: default
2305
+ split: test
2306
+ revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2307
+ metrics:
2308
+ - type: cos_sim_pearson
2309
+ value: 30.677624219206578
2310
+ - type: cos_sim_spearman
2311
+ value: 30.121368518123447
2312
+ - type: dot_pearson
2313
+ value: 30.69870088041608
2314
+ - type: dot_spearman
2315
+ value: 29.61284927093751
2316
+ - task:
2317
+ type: Retrieval
2318
+ dataset:
2319
+ type: trec-covid
2320
+ name: MTEB TRECCOVID
2321
+ config: default
2322
+ split: test
2323
+ revision: None
2324
+ metrics:
2325
+ - type: map_at_1
2326
+ value: 0.22
2327
+ - type: map_at_10
2328
+ value: 1.855
2329
+ - type: map_at_100
2330
+ value: 9.885
2331
+ - type: map_at_1000
2332
+ value: 23.416999999999998
2333
+ - type: map_at_3
2334
+ value: 0.637
2335
+ - type: map_at_5
2336
+ value: 1.024
2337
+ - type: mrr_at_1
2338
+ value: 88.0
2339
+ - type: mrr_at_10
2340
+ value: 93.067
2341
+ - type: mrr_at_100
2342
+ value: 93.067
2343
+ - type: mrr_at_1000
2344
+ value: 93.067
2345
+ - type: mrr_at_3
2346
+ value: 92.667
2347
+ - type: mrr_at_5
2348
+ value: 93.067
2349
+ - type: ndcg_at_1
2350
+ value: 82.0
2351
+ - type: ndcg_at_10
2352
+ value: 75.899
2353
+ - type: ndcg_at_100
2354
+ value: 55.115
2355
+ - type: ndcg_at_1000
2356
+ value: 48.368
2357
+ - type: ndcg_at_3
2358
+ value: 79.704
2359
+ - type: ndcg_at_5
2360
+ value: 78.39699999999999
2361
+ - type: precision_at_1
2362
+ value: 88.0
2363
+ - type: precision_at_10
2364
+ value: 79.60000000000001
2365
+ - type: precision_at_100
2366
+ value: 56.06
2367
+ - type: precision_at_1000
2368
+ value: 21.206
2369
+ - type: precision_at_3
2370
+ value: 84.667
2371
+ - type: precision_at_5
2372
+ value: 83.2
2373
+ - type: recall_at_1
2374
+ value: 0.22
2375
+ - type: recall_at_10
2376
+ value: 2.078
2377
+ - type: recall_at_100
2378
+ value: 13.297
2379
+ - type: recall_at_1000
2380
+ value: 44.979
2381
+ - type: recall_at_3
2382
+ value: 0.6689999999999999
2383
+ - type: recall_at_5
2384
+ value: 1.106
2385
+ - task:
2386
+ type: Retrieval
2387
+ dataset:
2388
+ type: webis-touche2020
2389
+ name: MTEB Touche2020
2390
+ config: default
2391
+ split: test
2392
+ revision: None
2393
+ metrics:
2394
+ - type: map_at_1
2395
+ value: 2.258
2396
+ - type: map_at_10
2397
+ value: 10.439
2398
+ - type: map_at_100
2399
+ value: 16.89
2400
+ - type: map_at_1000
2401
+ value: 18.407999999999998
2402
+ - type: map_at_3
2403
+ value: 5.668
2404
+ - type: map_at_5
2405
+ value: 7.718
2406
+ - type: mrr_at_1
2407
+ value: 32.653
2408
+ - type: mrr_at_10
2409
+ value: 51.159
2410
+ - type: mrr_at_100
2411
+ value: 51.714000000000006
2412
+ - type: mrr_at_1000
2413
+ value: 51.714000000000006
2414
+ - type: mrr_at_3
2415
+ value: 47.959
2416
+ - type: mrr_at_5
2417
+ value: 50.407999999999994
2418
+ - type: ndcg_at_1
2419
+ value: 29.592000000000002
2420
+ - type: ndcg_at_10
2421
+ value: 26.037
2422
+ - type: ndcg_at_100
2423
+ value: 37.924
2424
+ - type: ndcg_at_1000
2425
+ value: 49.126999999999995
2426
+ - type: ndcg_at_3
2427
+ value: 30.631999999999998
2428
+ - type: ndcg_at_5
2429
+ value: 28.571
2430
+ - type: precision_at_1
2431
+ value: 32.653
2432
+ - type: precision_at_10
2433
+ value: 22.857
2434
+ - type: precision_at_100
2435
+ value: 7.754999999999999
2436
+ - type: precision_at_1000
2437
+ value: 1.529
2438
+ - type: precision_at_3
2439
+ value: 34.014
2440
+ - type: precision_at_5
2441
+ value: 29.796
2442
+ - type: recall_at_1
2443
+ value: 2.258
2444
+ - type: recall_at_10
2445
+ value: 16.554
2446
+ - type: recall_at_100
2447
+ value: 48.439
2448
+ - type: recall_at_1000
2449
+ value: 82.80499999999999
2450
+ - type: recall_at_3
2451
+ value: 7.283
2452
+ - type: recall_at_5
2453
+ value: 10.732
2454
+ - task:
2455
+ type: Classification
2456
+ dataset:
2457
+ type: mteb/toxic_conversations_50k
2458
+ name: MTEB ToxicConversationsClassification
2459
+ config: default
2460
+ split: test
2461
+ revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2462
+ metrics:
2463
+ - type: accuracy
2464
+ value: 69.8858
2465
+ - type: ap
2466
+ value: 13.835684144362109
2467
+ - type: f1
2468
+ value: 53.803351693244586
2469
+ - task:
2470
+ type: Classification
2471
+ dataset:
2472
+ type: mteb/tweet_sentiment_extraction
2473
+ name: MTEB TweetSentimentExtractionClassification
2474
+ config: default
2475
+ split: test
2476
+ revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2477
+ metrics:
2478
+ - type: accuracy
2479
+ value: 60.50650820599886
2480
+ - type: f1
2481
+ value: 60.84357825979259
2482
+ - task:
2483
+ type: Clustering
2484
+ dataset:
2485
+ type: mteb/twentynewsgroups-clustering
2486
+ name: MTEB TwentyNewsgroupsClustering
2487
+ config: default
2488
+ split: test
2489
+ revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2490
+ metrics:
2491
+ - type: v_measure
2492
+ value: 48.52131044852134
2493
+ - task:
2494
+ type: PairClassification
2495
+ dataset:
2496
+ type: mteb/twittersemeval2015-pairclassification
2497
+ name: MTEB TwitterSemEval2015
2498
+ config: default
2499
+ split: test
2500
+ revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2501
+ metrics:
2502
+ - type: cos_sim_accuracy
2503
+ value: 85.59337187816654
2504
+ - type: cos_sim_ap
2505
+ value: 73.23925826533437
2506
+ - type: cos_sim_f1
2507
+ value: 67.34693877551021
2508
+ - type: cos_sim_precision
2509
+ value: 62.40432237730752
2510
+ - type: cos_sim_recall
2511
+ value: 73.13984168865434
2512
+ - type: dot_accuracy
2513
+ value: 85.31322644096085
2514
+ - type: dot_ap
2515
+ value: 72.30723963807422
2516
+ - type: dot_f1
2517
+ value: 66.47051612112296
2518
+ - type: dot_precision
2519
+ value: 62.0792305930845
2520
+ - type: dot_recall
2521
+ value: 71.53034300791556
2522
+ - type: euclidean_accuracy
2523
+ value: 85.61125350181797
2524
+ - type: euclidean_ap
2525
+ value: 73.32843720487845
2526
+ - type: euclidean_f1
2527
+ value: 67.36549633745895
2528
+ - type: euclidean_precision
2529
+ value: 64.60755813953489
2530
+ - type: euclidean_recall
2531
+ value: 70.36939313984169
2532
+ - type: manhattan_accuracy
2533
+ value: 85.63509566668654
2534
+ - type: manhattan_ap
2535
+ value: 73.16658488311325
2536
+ - type: manhattan_f1
2537
+ value: 67.20597386434349
2538
+ - type: manhattan_precision
2539
+ value: 63.60424028268551
2540
+ - type: manhattan_recall
2541
+ value: 71.2401055408971
2542
+ - type: max_accuracy
2543
+ value: 85.63509566668654
2544
+ - type: max_ap
2545
+ value: 73.32843720487845
2546
+ - type: max_f1
2547
+ value: 67.36549633745895
2548
+ - task:
2549
+ type: PairClassification
2550
+ dataset:
2551
+ type: mteb/twitterurlcorpus-pairclassification
2552
+ name: MTEB TwitterURLCorpus
2553
+ config: default
2554
+ split: test
2555
+ revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2556
+ metrics:
2557
+ - type: cos_sim_accuracy
2558
+ value: 88.33779640625606
2559
+ - type: cos_sim_ap
2560
+ value: 84.83868375898157
2561
+ - type: cos_sim_f1
2562
+ value: 77.16506154017773
2563
+ - type: cos_sim_precision
2564
+ value: 74.62064005753327
2565
+ - type: cos_sim_recall
2566
+ value: 79.88912842623961
2567
+ - type: dot_accuracy
2568
+ value: 88.02732176815307
2569
+ - type: dot_ap
2570
+ value: 83.95089283763002
2571
+ - type: dot_f1
2572
+ value: 76.29635101196631
2573
+ - type: dot_precision
2574
+ value: 73.31771720613288
2575
+ - type: dot_recall
2576
+ value: 79.52725592854944
2577
+ - type: euclidean_accuracy
2578
+ value: 88.44452206310397
2579
+ - type: euclidean_ap
2580
+ value: 84.98384576824827
2581
+ - type: euclidean_f1
2582
+ value: 77.29311047696697
2583
+ - type: euclidean_precision
2584
+ value: 74.51232583065381
2585
+ - type: euclidean_recall
2586
+ value: 80.28949799815214
2587
+ - type: manhattan_accuracy
2588
+ value: 88.47362906042613
2589
+ - type: manhattan_ap
2590
+ value: 84.91421462218432
2591
+ - type: manhattan_f1
2592
+ value: 77.05107637204792
2593
+ - type: manhattan_precision
2594
+ value: 74.74484256243214
2595
+ - type: manhattan_recall
2596
+ value: 79.50415768401602
2597
+ - type: max_accuracy
2598
+ value: 88.47362906042613
2599
+ - type: max_ap
2600
+ value: 84.98384576824827
2601
+ - type: max_f1
2602
+ value: 77.29311047696697
2603
+ license: mit
2604
+ language:
2605
+ - en
2606
+ ---
2607
+
2608
+ Finetuned using the same data & library as [WhereIsAI/UAE-Large-V1](https://huggingface.co/WhereIsAI/UAE-Large-V1)
2609
+
2610
+ <h1 align="center">FlagEmbedding</h1>
2611
+
2612
+
2613
+ <h4 align="center">
2614
+ <p>
2615
+ <a href=#model-list>Model List</a> |
2616
+ <a href=#frequently-asked-questions>FAQ</a> |
2617
+ <a href=#usage>Usage</a> |
2618
+ <a href="#evaluation">Evaluation</a> |
2619
+ <a href="#train">Train</a> |
2620
+ <a href="#contact">Contact</a> |
2621
+ <a href="#citation">Citation</a> |
2622
+ <a href="#license">License</a>
2623
+ <p>
2624
+ </h4>
2625
+
2626
+ More details please refer to our Github: [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding).
2627
+
2628
+
2629
+ [English](README.md) | [中文](https://github.com/FlagOpen/FlagEmbedding/blob/master/README_zh.md)
2630
+
2631
+ FlagEmbedding can map any text to a low-dimensional dense vector which can be used for tasks like retrieval, classification, clustering, or semantic search.
2632
+ And it also can be used in vector databases for LLMs.
2633
+
2634
+ ************* 🌟**Updates**🌟 *************
2635
+ - 10/12/2023: Release [LLM-Embedder](./FlagEmbedding/llm_embedder/README.md), a unified embedding model to support diverse retrieval augmentation needs for LLMs. [Paper](https://arxiv.org/pdf/2310.07554.pdf) :fire:
2636
+ - 09/15/2023: The [technical report](https://arxiv.org/pdf/2309.07597.pdf) of BGE has been released
2637
+ - 09/15/2023: The [masive training data](https://data.baai.ac.cn/details/BAAI-MTP) of BGE has been released
2638
+ - 09/12/2023: New models:
2639
+ - **New reranker model**: release cross-encoder models `BAAI/bge-reranker-base` and `BAAI/bge-reranker-large`, which are more powerful than embedding model. We recommend to use/fine-tune them to re-rank top-k documents returned by embedding models.
2640
+ - **update embedding model**: release `bge-*-v1.5` embedding model to alleviate the issue of the similarity distribution, and enhance its retrieval ability without instruction.
2641
+
2642
+
2643
+ <details>
2644
+ <summary>More</summary>
2645
+ <!-- ### More -->
2646
+
2647
+ - 09/07/2023: Update [fine-tune code](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md): Add script to mine hard negatives and support adding instruction during fine-tuning.
2648
+ - 08/09/2023: BGE Models are integrated into **Langchain**, you can use it like [this](#using-langchain); C-MTEB **leaderboard** is [available](https://huggingface.co/spaces/mteb/leaderboard).
2649
+ - 08/05/2023: Release base-scale and small-scale models, **best performance among the models of the same size 🤗**
2650
+ - 08/02/2023: Release `bge-large-*`(short for BAAI General Embedding) Models, **rank 1st on MTEB and C-MTEB benchmark!** :tada: :tada:
2651
+ - 08/01/2023: We release the [Chinese Massive Text Embedding Benchmark](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB) (**C-MTEB**), consisting of 31 test dataset.
2652
+
2653
+ </details>
2654
+
2655
+
2656
+ ## Model List
2657
+
2658
+ `bge` is short for `BAAI general embedding`.
2659
+
2660
+ | Model | Language | | Description | query instruction for retrieval [1] |
2661
+ |:-------------------------------|:--------:| :--------:| :--------:|:--------:|
2662
+ | [BAAI/llm-embedder](https://huggingface.co/BAAI/llm-embedder) | English | [Inference](./FlagEmbedding/llm_embedder/README.md) [Fine-tune](./FlagEmbedding/llm_embedder/README.md) | a unified embedding model to support diverse retrieval augmentation needs for LLMs | See [README](./FlagEmbedding/llm_embedder/README.md) |
2663
+ | [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large) | Chinese and English | [Inference](#usage-for-reranker) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker) | a cross-encoder model which is more accurate but less efficient [2] | |
2664
+ | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | Chinese and English | [Inference](#usage-for-reranker) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker) | a cross-encoder model which is more accurate but less efficient [2] | |
2665
+ | [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: ` |
2666
+ | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: ` |
2667
+ | [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `Represent this sentence for searching relevant passages: ` |
2668
+ | [BAAI/bge-large-zh-v1.5](https://huggingface.co/BAAI/bge-large-zh-v1.5) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章:` |
2669
+ | [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章:` |
2670
+ | [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | version 1.5 with more reasonable similarity distribution | `为这个句子生成表示以用于检索相关文章:` |
2671
+ | [BAAI/bge-large-en](https://huggingface.co/BAAI/bge-large-en) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | :trophy: rank **1st** in [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard | `Represent this sentence for searching relevant passages: ` |
2672
+ | [BAAI/bge-base-en](https://huggingface.co/BAAI/bge-base-en) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a base-scale model but with similar ability to `bge-large-en` | `Represent this sentence for searching relevant passages: ` |
2673
+ | [BAAI/bge-small-en](https://huggingface.co/BAAI/bge-small-en) | English | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) |a small-scale model but with competitive performance | `Represent this sentence for searching relevant passages: ` |
2674
+ | [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | :trophy: rank **1st** in [C-MTEB](https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB) benchmark | `为这个句子生成表示以用于检索相关文章:` |
2675
+ | [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a base-scale model but with similar ability to `bge-large-zh` | `为这个句子生成表示以用于检索相关文章:` |
2676
+ | [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh) | Chinese | [Inference](#usage-for-embedding-model) [Fine-tune](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) | a small-scale model but with competitive performance | `为这个句子生成表示以用于检索相关文章:` |
2677
+
2678
+
2679
+ [1\]: If you need to search the relevant passages to a query, we suggest to add the instruction to the query; in other cases, no instruction is needed, just use the original query directly. In all cases, **no instruction** needs to be added to passages.
2680
+
2681
+ [2\]: Different from embedding model, reranker uses question and document as input and directly output similarity instead of embedding. To balance the accuracy and time cost, cross-encoder is widely used to re-rank top-k documents retrieved by other simple models.
2682
+ For examples, use bge embedding model to retrieve top 100 relevant documents, and then use bge reranker to re-rank the top 100 document to get the final top-3 results.
2683
+
2684
+ All models have been uploaded to Huggingface Hub, and you can see them at https://huggingface.co/BAAI.
2685
+ If you cannot open the Huggingface Hub, you also can download the models at https://model.baai.ac.cn/models .
2686
+
2687
+
2688
+ ## Frequently asked questions
2689
+
2690
+ <details>
2691
+ <summary>1. How to fine-tune bge embedding model?</summary>
2692
+
2693
+ <!-- ### How to fine-tune bge embedding model? -->
2694
+ Following this [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune) to prepare data and fine-tune your model.
2695
+ Some suggestions:
2696
+ - Mine hard negatives following this [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune#hard-negatives), which can improve the retrieval performance.
2697
+ - If you pre-train bge on your data, the pre-trained model cannot be directly used to calculate similarity, and it must be fine-tuned with contrastive learning before computing similarity.
2698
+ - If the accuracy of the fine-tuned model is still not high, it is recommended to use/fine-tune the cross-encoder model (bge-reranker) to re-rank top-k results. Hard negatives also are needed to fine-tune reranker.
2699
+
2700
+
2701
+ </details>
2702
+
2703
+ <details>
2704
+ <summary>2. The similarity score between two dissimilar sentences is higher than 0.5</summary>
2705
+
2706
+ <!-- ### The similarity score between two dissimilar sentences is higher than 0.5 -->
2707
+ **Suggest to use bge v1.5, which alleviates the issue of the similarity distribution.**
2708
+
2709
+ Since we finetune the models by contrastive learning with a temperature of 0.01,
2710
+ the similarity distribution of the current BGE model is about in the interval \[0.6, 1\].
2711
+ So a similarity score greater than 0.5 does not indicate that the two sentences are similar.
2712
+
2713
+ For downstream tasks, such as passage retrieval or semantic similarity,
2714
+ **what matters is the relative order of the scores, not the absolute value.**
2715
+ If you need to filter similar sentences based on a similarity threshold,
2716
+ please select an appropriate similarity threshold based on the similarity distribution on your data (such as 0.8, 0.85, or even 0.9).
2717
+
2718
+ </details>
2719
+
2720
+ <details>
2721
+ <summary>3. When does the query instruction need to be used</summary>
2722
+
2723
+ <!-- ### When does the query instruction need to be used -->
2724
+
2725
+ For the `bge-*-v1.5`, we improve its retrieval ability when not using instruction.
2726
+ No instruction only has a slight degradation in retrieval performance compared with using instruction.
2727
+ So you can generate embedding without instruction in all cases for convenience.
2728
+
2729
+ For a retrieval task that uses short queries to find long related documents,
2730
+ it is recommended to add instructions for these short queries.
2731
+ **The best method to decide whether to add instructions for queries is choosing the setting that achieves better performance on your task.**
2732
+ In all cases, the documents/passages do not need to add the instruction.
2733
+
2734
+ </details>
2735
+
2736
+
2737
+ ## Usage
2738
+
2739
+ ### Usage for Embedding Model
2740
+
2741
+ Here are some examples for using `bge` models with
2742
+ [FlagEmbedding](#using-flagembedding), [Sentence-Transformers](#using-sentence-transformers), [Langchain](#using-langchain), or [Huggingface Transformers](#using-huggingface-transformers).
2743
+
2744
+ #### Using FlagEmbedding
2745
+ ```
2746
+ pip install -U FlagEmbedding
2747
+ ```
2748
+ If it doesn't work for you, you can see [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md) for more methods to install FlagEmbedding.
2749
+
2750
+ ```python
2751
+ from FlagEmbedding import FlagModel
2752
+ sentences_1 = ["样例数据-1", "样例数据-2"]
2753
+ sentences_2 = ["样例数据-3", "样例数据-4"]
2754
+ model = FlagModel('BAAI/bge-large-zh-v1.5',
2755
+ query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
2756
+ use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
2757
+ embeddings_1 = model.encode(sentences_1)
2758
+ embeddings_2 = model.encode(sentences_2)
2759
+ similarity = embeddings_1 @ embeddings_2.T
2760
+ print(similarity)
2761
+
2762
+ # for s2p(short query to long passage) retrieval task, suggest to use encode_queries() which will automatically add the instruction to each query
2763
+ # corpus in retrieval task can still use encode() or encode_corpus(), since they don't need instruction
2764
+ queries = ['query_1', 'query_2']
2765
+ passages = ["样例文档-1", "样例文档-2"]
2766
+ q_embeddings = model.encode_queries(queries)
2767
+ p_embeddings = model.encode(passages)
2768
+ scores = q_embeddings @ p_embeddings.T
2769
+ ```
2770
+ For the value of the argument `query_instruction_for_retrieval`, see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list).
2771
+
2772
+ By default, FlagModel will use all available GPUs when encoding. Please set `os.environ["CUDA_VISIBLE_DEVICES"]` to select specific GPUs.
2773
+ You also can set `os.environ["CUDA_VISIBLE_DEVICES"]=""` to make all GPUs unavailable.
2774
+
2775
+
2776
+ #### Using Sentence-Transformers
2777
+
2778
+ You can also use the `bge` models with [sentence-transformers](https://www.SBERT.net):
2779
+
2780
+ ```
2781
+ pip install -U sentence-transformers
2782
+ ```
2783
+ ```python
2784
+ from sentence_transformers import SentenceTransformer
2785
+ sentences_1 = ["样例数据-1", "样例数据-2"]
2786
+ sentences_2 = ["样例数据-3", "样例数据-4"]
2787
+ model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
2788
+ embeddings_1 = model.encode(sentences_1, normalize_embeddings=True)
2789
+ embeddings_2 = model.encode(sentences_2, normalize_embeddings=True)
2790
+ similarity = embeddings_1 @ embeddings_2.T
2791
+ print(similarity)
2792
+ ```
2793
+ For s2p(short query to long passage) retrieval task,
2794
+ each short query should start with an instruction (instructions see [Model List](https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list)).
2795
+ But the instruction is not needed for passages.
2796
+ ```python
2797
+ from sentence_transformers import SentenceTransformer
2798
+ queries = ['query_1', 'query_2']
2799
+ passages = ["样例文档-1", "样例文档-2"]
2800
+ instruction = "为这个句子生成表示以用于检索相关文章:"
2801
+
2802
+ model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
2803
+ q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
2804
+ p_embeddings = model.encode(passages, normalize_embeddings=True)
2805
+ scores = q_embeddings @ p_embeddings.T
2806
+ ```
2807
+
2808
+ #### Using Langchain
2809
+
2810
+ You can use `bge` in langchain like this:
2811
+ ```python
2812
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
2813
+ model_name = "BAAI/bge-large-en-v1.5"
2814
+ model_kwargs = {'device': 'cuda'}
2815
+ encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
2816
+ model = HuggingFaceBgeEmbeddings(
2817
+ model_name=model_name,
2818
+ model_kwargs=model_kwargs,
2819
+ encode_kwargs=encode_kwargs,
2820
+ query_instruction="为这个句子生成表示以用于检索相关文章:"
2821
+ )
2822
+ model.query_instruction = "为这个句子生成表示以用于检索相关文章:"
2823
+ ```
2824
+
2825
+
2826
+ #### Using HuggingFace Transformers
2827
+
2828
+ With the transformers package, you can use the model like this: First, you pass your input through the transformer model, then you select the last hidden state of the first token (i.e., [CLS]) as the sentence embedding.
2829
+
2830
+ ```python
2831
+ from transformers import AutoTokenizer, AutoModel
2832
+ import torch
2833
+ # Sentences we want sentence embeddings for
2834
+ sentences = ["样例数据-1", "样例数据-2"]
2835
+
2836
+ # Load model from HuggingFace Hub
2837
+ tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-zh-v1.5')
2838
+ model = AutoModel.from_pretrained('BAAI/bge-large-zh-v1.5')
2839
+ model.eval()
2840
+
2841
+ # Tokenize sentences
2842
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
2843
+ # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
2844
+ # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
2845
+
2846
+ # Compute token embeddings
2847
+ with torch.no_grad():
2848
+ model_output = model(**encoded_input)
2849
+ # Perform pooling. In this case, cls pooling.
2850
+ sentence_embeddings = model_output[0][:, 0]
2851
+ # normalize embeddings
2852
+ sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
2853
+ print("Sentence embeddings:", sentence_embeddings)
2854
+ ```
2855
+
2856
+ ### Usage for Reranker
2857
+
2858
+ Different from embedding model, reranker uses question and document as input and directly output similarity instead of embedding.
2859
+ You can get a relevance score by inputting query and passage to the reranker.
2860
+ The reranker is optimized based cross-entropy loss, so the relevance score is not bounded to a specific range.
2861
+
2862
+
2863
+ #### Using FlagEmbedding
2864
+ ```
2865
+ pip install -U FlagEmbedding
2866
+ ```
2867
+
2868
+ Get relevance scores (higher scores indicate more relevance):
2869
+ ```python
2870
+ from FlagEmbedding import FlagReranker
2871
+ reranker = FlagReranker('BAAI/bge-reranker-large', use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
2872
+
2873
+ score = reranker.compute_score(['query', 'passage'])
2874
+ print(score)
2875
+
2876
+ scores = reranker.compute_score([['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']])
2877
+ print(scores)
2878
+ ```
2879
+
2880
+
2881
+ #### Using Huggingface transformers
2882
+
2883
+ ```python
2884
+ import torch
2885
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
2886
+
2887
+ tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-reranker-large')
2888
+ model = AutoModelForSequenceClassification.from_pretrained('BAAI/bge-reranker-large')
2889
+ model.eval()
2890
+
2891
+ pairs = [['what is panda?', 'hi'], ['what is panda?', 'The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.']]
2892
+ with torch.no_grad():
2893
+ inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors='pt', max_length=512)
2894
+ scores = model(**inputs, return_dict=True).logits.view(-1, ).float()
2895
+ print(scores)
2896
+ ```
2897
+
2898
+ ## Evaluation
2899
+
2900
+ `baai-general-embedding` models achieve **state-of-the-art performance on both MTEB and C-MTEB leaderboard!**
2901
+ For more details and evaluation tools see our [scripts](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md).
2902
+
2903
+ - **MTEB**:
2904
+
2905
+ | Model Name | Dimension | Sequence Length | Average (56) | Retrieval (15) |Clustering (11) | Pair Classification (3) | Reranking (4) | STS (10) | Summarization (1) | Classification (12) |
2906
+ |:----:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
2907
+ | [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) | 1024 | 512 | **64.23** | **54.29** | 46.08 | 87.12 | 60.03 | 83.11 | 31.61 | 75.97 |
2908
+ | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | 768 | 512 | 63.55 | 53.25 | 45.77 | 86.55 | 58.86 | 82.4 | 31.07 | 75.53 |
2909
+ | [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) | 384 | 512 | 62.17 |51.68 | 43.82 | 84.92 | 58.36 | 81.59 | 30.12 | 74.14 |
2910
+ | [bge-large-en](https://huggingface.co/BAAI/bge-large-en) | 1024 | 512 | 63.98 | 53.9 | 46.98 | 85.8 | 59.48 | 81.56 | 32.06 | 76.21 |
2911
+ | [bge-base-en](https://huggingface.co/BAAI/bge-base-en) | 768 | 512 | 63.36 | 53.0 | 46.32 | 85.86 | 58.7 | 81.84 | 29.27 | 75.27 |
2912
+ | [gte-large](https://huggingface.co/thenlper/gte-large) | 1024 | 512 | 63.13 | 52.22 | 46.84 | 85.00 | 59.13 | 83.35 | 31.66 | 73.33 |
2913
+ | [gte-base](https://huggingface.co/thenlper/gte-base) | 768 | 512 | 62.39 | 51.14 | 46.2 | 84.57 | 58.61 | 82.3 | 31.17 | 73.01 |
2914
+ | [e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) | 1024| 512 | 62.25 | 50.56 | 44.49 | 86.03 | 56.61 | 82.05 | 30.19 | 75.24 |
2915
+ | [bge-small-en](https://huggingface.co/BAAI/bge-small-en) | 384 | 512 | 62.11 | 51.82 | 44.31 | 83.78 | 57.97 | 80.72 | 30.53 | 74.37 |
2916
+ | [instructor-xl](https://huggingface.co/hkunlp/instructor-xl) | 768 | 512 | 61.79 | 49.26 | 44.74 | 86.62 | 57.29 | 83.06 | 32.32 | 61.79 |
2917
+ | [e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) | 768 | 512 | 61.5 | 50.29 | 43.80 | 85.73 | 55.91 | 81.05 | 30.28 | 73.84 |
2918
+ | [gte-small](https://huggingface.co/thenlper/gte-small) | 384 | 512 | 61.36 | 49.46 | 44.89 | 83.54 | 57.7 | 82.07 | 30.42 | 72.31 |
2919
+ | [text-embedding-ada-002](https://platform.openai.com/docs/guides/embeddings) | 1536 | 8192 | 60.99 | 49.25 | 45.9 | 84.89 | 56.32 | 80.97 | 30.8 | 70.93 |
2920
+ | [e5-small-v2](https://huggingface.co/intfloat/e5-base-v2) | 384 | 512 | 59.93 | 49.04 | 39.92 | 84.67 | 54.32 | 80.39 | 31.16 | 72.94 |
2921
+ | [sentence-t5-xxl](https://huggingface.co/sentence-transformers/sentence-t5-xxl) | 768 | 512 | 59.51 | 42.24 | 43.72 | 85.06 | 56.42 | 82.63 | 30.08 | 73.42 |
2922
+ | [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) | 768 | 514 | 57.78 | 43.81 | 43.69 | 83.04 | 59.36 | 80.28 | 27.49 | 65.07 |
2923
+ | [sgpt-bloom-7b1-msmarco](https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco) | 4096 | 2048 | 57.59 | 48.22 | 38.93 | 81.9 | 55.65 | 77.74 | 33.6 | 66.19 |
2924
+
2925
+
2926
+
2927
+ - **C-MTEB**:
2928
+ We create the benchmark C-MTEB for Chinese text embedding which consists of 31 datasets from 6 tasks.
2929
+ Please refer to [C_MTEB](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md) for a detailed introduction.
2930
+
2931
+ | Model | Embedding dimension | Avg | Retrieval | STS | PairClassification | Classification | Reranking | Clustering |
2932
+ |:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
2933
+ | [**BAAI/bge-large-zh-v1.5**](https://huggingface.co/BAAI/bge-large-zh-v1.5) | 1024 | **64.53** | 70.46 | 56.25 | 81.6 | 69.13 | 65.84 | 48.99 |
2934
+ | [BAAI/bge-base-zh-v1.5](https://huggingface.co/BAAI/bge-base-zh-v1.5) | 768 | 63.13 | 69.49 | 53.72 | 79.75 | 68.07 | 65.39 | 47.53 |
2935
+ | [BAAI/bge-small-zh-v1.5](https://huggingface.co/BAAI/bge-small-zh-v1.5) | 512 | 57.82 | 61.77 | 49.11 | 70.41 | 63.96 | 60.92 | 44.18 |
2936
+ | [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh) | 1024 | 64.20 | 71.53 | 54.98 | 78.94 | 68.32 | 65.11 | 48.39 |
2937
+ | [bge-large-zh-noinstruct](https://huggingface.co/BAAI/bge-large-zh-noinstruct) | 1024 | 63.53 | 70.55 | 53 | 76.77 | 68.58 | 64.91 | 50.01 |
2938
+ | [BAAI/bge-base-zh](https://huggingface.co/BAAI/bge-base-zh) | 768 | 62.96 | 69.53 | 54.12 | 77.5 | 67.07 | 64.91 | 47.63 |
2939
+ | [multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) | 1024 | 58.79 | 63.66 | 48.44 | 69.89 | 67.34 | 56.00 | 48.23 |
2940
+ | [BAAI/bge-small-zh](https://huggingface.co/BAAI/bge-small-zh) | 512 | 58.27 | 63.07 | 49.45 | 70.35 | 63.64 | 61.48 | 45.09 |
2941
+ | [m3e-base](https://huggingface.co/moka-ai/m3e-base) | 768 | 57.10 | 56.91 | 50.47 | 63.99 | 67.52 | 59.34 | 47.68 |
2942
+ | [m3e-large](https://huggingface.co/moka-ai/m3e-large) | 1024 | 57.05 | 54.75 | 50.42 | 64.3 | 68.2 | 59.66 | 48.88 |
2943
+ | [multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) | 768 | 55.48 | 61.63 | 46.49 | 67.07 | 65.35 | 54.35 | 40.68 |
2944
+ | [multilingual-e5-small](https://huggingface.co/intfloat/multilingual-e5-small) | 384 | 55.38 | 59.95 | 45.27 | 66.45 | 65.85 | 53.86 | 45.26 |
2945
+ | [text-embedding-ada-002(OpenAI)](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) | 1536 | 53.02 | 52.0 | 43.35 | 69.56 | 64.31 | 54.28 | 45.68 |
2946
+ | [luotuo](https://huggingface.co/silk-road/luotuo-bert-medium) | 1024 | 49.37 | 44.4 | 42.78 | 66.62 | 61 | 49.25 | 44.39 |
2947
+ | [text2vec-base](https://huggingface.co/shibing624/text2vec-base-chinese) | 768 | 47.63 | 38.79 | 43.41 | 67.41 | 62.19 | 49.45 | 37.66 |
2948
+ | [text2vec-large](https://huggingface.co/GanymedeNil/text2vec-large-chinese) | 1024 | 47.36 | 41.94 | 44.97 | 70.86 | 60.66 | 49.16 | 30.02 |
2949
+
2950
+
2951
+ - **Reranking**:
2952
+ See [C_MTEB](https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/) for evaluation script.
2953
+
2954
+ | Model | T2Reranking | T2RerankingZh2En\* | T2RerankingEn2Zh\* | MMarcoReranking | CMedQAv1 | CMedQAv2 | Avg |
2955
+ |:-------------------------------|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
2956
+ | text2vec-base-multilingual | 64.66 | 62.94 | 62.51 | 14.37 | 48.46 | 48.6 | 50.26 |
2957
+ | multilingual-e5-small | 65.62 | 60.94 | 56.41 | 29.91 | 67.26 | 66.54 | 57.78 |
2958
+ | multilingual-e5-large | 64.55 | 61.61 | 54.28 | 28.6 | 67.42 | 67.92 | 57.4 |
2959
+ | multilingual-e5-base | 64.21 | 62.13 | 54.68 | 29.5 | 66.23 | 66.98 | 57.29 |
2960
+ | m3e-base | 66.03 | 62.74 | 56.07 | 17.51 | 77.05 | 76.76 | 59.36 |
2961
+ | m3e-large | 66.13 | 62.72 | 56.1 | 16.46 | 77.76 | 78.27 | 59.57 |
2962
+ | bge-base-zh-v1.5 | 66.49 | 63.25 | 57.02 | 29.74 | 80.47 | 84.88 | 63.64 |
2963
+ | bge-large-zh-v1.5 | 65.74 | 63.39 | 57.03 | 28.74 | 83.45 | 85.44 | 63.97 |
2964
+ | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | 67.28 | 63.95 | 60.45 | 35.46 | 81.26 | 84.1 | 65.42 |
2965
+ | [BAAI/bge-reranker-large](https://huggingface.co/BAAI/bge-reranker-large) | 67.6 | 64.03 | 61.44 | 37.16 | 82.15 | 84.18 | 66.09 |
2966
+
2967
+ \* : T2RerankingZh2En and T2RerankingEn2Zh are cross-language retrieval tasks
2968
+
2969
+ ## Train
2970
+
2971
+ ### BAAI Embedding
2972
+
2973
+ We pre-train the models using [retromae](https://github.com/staoxiao/RetroMAE) and train them on large-scale pairs data using contrastive learning.
2974
+ **You can fine-tune the embedding model on your data following our [examples](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune).**
2975
+ We also provide a [pre-train example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/pretrain).
2976
+ Note that the goal of pre-training is to reconstruct the text, and the pre-trained model cannot be used for similarity calculation directly, it needs to be fine-tuned.
2977
+ More training details for bge see [baai_general_embedding](https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md).
2978
+
2979
+
2980
+
2981
+ ### BGE Reranker
2982
+
2983
+ Cross-encoder will perform full-attention over the input pair,
2984
+ which is more accurate than embedding model (i.e., bi-encoder) but more time-consuming than embedding model.
2985
+ Therefore, it can be used to re-rank the top-k documents returned by embedding model.
2986
+ We train the cross-encoder on a multilingual pair data,
2987
+ The data format is the same as embedding model, so you can fine-tune it easily following our [example](https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker).
2988
+ More details please refer to [./FlagEmbedding/reranker/README.md](https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/reranker)
2989
+
2990
+
2991
+ ## Contact
2992
+ If you have any question or suggestion related to this project, feel free to open an issue or pull request.
2993
+ You also can email Shitao Xiao(stxiao@baai.ac.cn) and Zheng Liu(liuzheng@baai.ac.cn).
2994
+
2995
+
2996
+ ## Citation
2997
+
2998
+ If you find this repository useful, please consider giving a star :star: and citation
2999
+
3000
+ ```
3001
+ @misc{bge_embedding,
3002
+ title={C-Pack: Packaged Resources To Advance General Chinese Embedding},
3003
+ author={Shitao Xiao and Zheng Liu and Peitian Zhang and Niklas Muennighoff},
3004
+ year={2023},
3005
+ eprint={2309.07597},
3006
+ archivePrefix={arXiv},
3007
+ primaryClass={cs.CL}
3008
+ }
3009
+ ```
3010
+
3011
+ ## License
3012
+ FlagEmbedding is licensed under the [MIT License](https://github.com/FlagOpen/FlagEmbedding/blob/master/LICENSE). The released models can be used for commercial purposes free of charge.
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jupyter-wb536061/.cache/torch/sentence_transformers/khoa-klaytn_bge-small-en-v1.5-angle/",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "id2label": {
12
+ "0": "LABEL_0"
13
+ },
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_eps": 1e-12,
20
+ "max_position_embeddings": 512,
21
+ "model_type": "bert",
22
+ "num_attention_heads": 12,
23
+ "num_hidden_layers": 12,
24
+ "output_hidden_states": true,
25
+ "pad_token_id": 0,
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.36.2",
29
+ "type_vocab_size": 2,
30
+ "use_cache": false,
31
+ "vocab_size": 30522
32
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.2",
4
+ "transformers": "4.28.1",
5
+ "pytorch": "1.13.0+cu117"
6
+ }
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1e683966ad5a3828fc3d285c373f902ed0d6783112d413d4650645353851b1f
3
+ size 133462128
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": true
4
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_01_khoa-klaytn_bge-small-en-v1.5-angle/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/README.md ADDED
@@ -0,0 +1,2702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - mteb
4
+ - sentence-similarity
5
+ - sentence-transformers
6
+ - Sentence Transformers
7
+ model-index:
8
+ - name: gte-small
9
+ results:
10
+ - task:
11
+ type: Classification
12
+ dataset:
13
+ type: mteb/amazon_counterfactual
14
+ name: MTEB AmazonCounterfactualClassification (en)
15
+ config: en
16
+ split: test
17
+ revision: e8379541af4e31359cca9fbcf4b00f2671dba205
18
+ metrics:
19
+ - type: accuracy
20
+ value: 73.22388059701493
21
+ - type: ap
22
+ value: 36.09895941426988
23
+ - type: f1
24
+ value: 67.3205651539195
25
+ - task:
26
+ type: Classification
27
+ dataset:
28
+ type: mteb/amazon_polarity
29
+ name: MTEB AmazonPolarityClassification
30
+ config: default
31
+ split: test
32
+ revision: e2d317d38cd51312af73b3d32a06d1a08b442046
33
+ metrics:
34
+ - type: accuracy
35
+ value: 91.81894999999999
36
+ - type: ap
37
+ value: 88.5240138417305
38
+ - type: f1
39
+ value: 91.80367382706962
40
+ - task:
41
+ type: Classification
42
+ dataset:
43
+ type: mteb/amazon_reviews_multi
44
+ name: MTEB AmazonReviewsClassification (en)
45
+ config: en
46
+ split: test
47
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
48
+ metrics:
49
+ - type: accuracy
50
+ value: 48.032
51
+ - type: f1
52
+ value: 47.4490665674719
53
+ - task:
54
+ type: Retrieval
55
+ dataset:
56
+ type: arguana
57
+ name: MTEB ArguAna
58
+ config: default
59
+ split: test
60
+ revision: None
61
+ metrics:
62
+ - type: map_at_1
63
+ value: 30.725
64
+ - type: map_at_10
65
+ value: 46.604
66
+ - type: map_at_100
67
+ value: 47.535
68
+ - type: map_at_1000
69
+ value: 47.538000000000004
70
+ - type: map_at_3
71
+ value: 41.833
72
+ - type: map_at_5
73
+ value: 44.61
74
+ - type: mrr_at_1
75
+ value: 31.223
76
+ - type: mrr_at_10
77
+ value: 46.794000000000004
78
+ - type: mrr_at_100
79
+ value: 47.725
80
+ - type: mrr_at_1000
81
+ value: 47.727000000000004
82
+ - type: mrr_at_3
83
+ value: 42.07
84
+ - type: mrr_at_5
85
+ value: 44.812000000000005
86
+ - type: ndcg_at_1
87
+ value: 30.725
88
+ - type: ndcg_at_10
89
+ value: 55.440999999999995
90
+ - type: ndcg_at_100
91
+ value: 59.134
92
+ - type: ndcg_at_1000
93
+ value: 59.199
94
+ - type: ndcg_at_3
95
+ value: 45.599000000000004
96
+ - type: ndcg_at_5
97
+ value: 50.637
98
+ - type: precision_at_1
99
+ value: 30.725
100
+ - type: precision_at_10
101
+ value: 8.364
102
+ - type: precision_at_100
103
+ value: 0.991
104
+ - type: precision_at_1000
105
+ value: 0.1
106
+ - type: precision_at_3
107
+ value: 18.848000000000003
108
+ - type: precision_at_5
109
+ value: 13.77
110
+ - type: recall_at_1
111
+ value: 30.725
112
+ - type: recall_at_10
113
+ value: 83.64200000000001
114
+ - type: recall_at_100
115
+ value: 99.14699999999999
116
+ - type: recall_at_1000
117
+ value: 99.644
118
+ - type: recall_at_3
119
+ value: 56.543
120
+ - type: recall_at_5
121
+ value: 68.848
122
+ - task:
123
+ type: Clustering
124
+ dataset:
125
+ type: mteb/arxiv-clustering-p2p
126
+ name: MTEB ArxivClusteringP2P
127
+ config: default
128
+ split: test
129
+ revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
130
+ metrics:
131
+ - type: v_measure
132
+ value: 47.90178078197678
133
+ - task:
134
+ type: Clustering
135
+ dataset:
136
+ type: mteb/arxiv-clustering-s2s
137
+ name: MTEB ArxivClusteringS2S
138
+ config: default
139
+ split: test
140
+ revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
141
+ metrics:
142
+ - type: v_measure
143
+ value: 40.25728393431922
144
+ - task:
145
+ type: Reranking
146
+ dataset:
147
+ type: mteb/askubuntudupquestions-reranking
148
+ name: MTEB AskUbuntuDupQuestions
149
+ config: default
150
+ split: test
151
+ revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
152
+ metrics:
153
+ - type: map
154
+ value: 61.720297062897764
155
+ - type: mrr
156
+ value: 75.24139295607439
157
+ - task:
158
+ type: STS
159
+ dataset:
160
+ type: mteb/biosses-sts
161
+ name: MTEB BIOSSES
162
+ config: default
163
+ split: test
164
+ revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
165
+ metrics:
166
+ - type: cos_sim_pearson
167
+ value: 89.43527309184616
168
+ - type: cos_sim_spearman
169
+ value: 88.17128615100206
170
+ - type: euclidean_pearson
171
+ value: 87.89922623089282
172
+ - type: euclidean_spearman
173
+ value: 87.96104039655451
174
+ - type: manhattan_pearson
175
+ value: 87.9818290932077
176
+ - type: manhattan_spearman
177
+ value: 88.00923426576885
178
+ - task:
179
+ type: Classification
180
+ dataset:
181
+ type: mteb/banking77
182
+ name: MTEB Banking77Classification
183
+ config: default
184
+ split: test
185
+ revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
186
+ metrics:
187
+ - type: accuracy
188
+ value: 84.0844155844156
189
+ - type: f1
190
+ value: 84.01485017302213
191
+ - task:
192
+ type: Clustering
193
+ dataset:
194
+ type: mteb/biorxiv-clustering-p2p
195
+ name: MTEB BiorxivClusteringP2P
196
+ config: default
197
+ split: test
198
+ revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
199
+ metrics:
200
+ - type: v_measure
201
+ value: 38.36574769259432
202
+ - task:
203
+ type: Clustering
204
+ dataset:
205
+ type: mteb/biorxiv-clustering-s2s
206
+ name: MTEB BiorxivClusteringS2S
207
+ config: default
208
+ split: test
209
+ revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
210
+ metrics:
211
+ - type: v_measure
212
+ value: 35.4857033165287
213
+ - task:
214
+ type: Retrieval
215
+ dataset:
216
+ type: BeIR/cqadupstack
217
+ name: MTEB CQADupstackAndroidRetrieval
218
+ config: default
219
+ split: test
220
+ revision: None
221
+ metrics:
222
+ - type: map_at_1
223
+ value: 30.261
224
+ - type: map_at_10
225
+ value: 42.419000000000004
226
+ - type: map_at_100
227
+ value: 43.927
228
+ - type: map_at_1000
229
+ value: 44.055
230
+ - type: map_at_3
231
+ value: 38.597
232
+ - type: map_at_5
233
+ value: 40.701
234
+ - type: mrr_at_1
235
+ value: 36.91
236
+ - type: mrr_at_10
237
+ value: 48.02
238
+ - type: mrr_at_100
239
+ value: 48.658
240
+ - type: mrr_at_1000
241
+ value: 48.708
242
+ - type: mrr_at_3
243
+ value: 44.945
244
+ - type: mrr_at_5
245
+ value: 46.705000000000005
246
+ - type: ndcg_at_1
247
+ value: 36.91
248
+ - type: ndcg_at_10
249
+ value: 49.353
250
+ - type: ndcg_at_100
251
+ value: 54.456
252
+ - type: ndcg_at_1000
253
+ value: 56.363
254
+ - type: ndcg_at_3
255
+ value: 43.483
256
+ - type: ndcg_at_5
257
+ value: 46.150999999999996
258
+ - type: precision_at_1
259
+ value: 36.91
260
+ - type: precision_at_10
261
+ value: 9.700000000000001
262
+ - type: precision_at_100
263
+ value: 1.557
264
+ - type: precision_at_1000
265
+ value: 0.202
266
+ - type: precision_at_3
267
+ value: 21.078
268
+ - type: precision_at_5
269
+ value: 15.421999999999999
270
+ - type: recall_at_1
271
+ value: 30.261
272
+ - type: recall_at_10
273
+ value: 63.242
274
+ - type: recall_at_100
275
+ value: 84.09100000000001
276
+ - type: recall_at_1000
277
+ value: 96.143
278
+ - type: recall_at_3
279
+ value: 46.478
280
+ - type: recall_at_5
281
+ value: 53.708
282
+ - task:
283
+ type: Retrieval
284
+ dataset:
285
+ type: BeIR/cqadupstack
286
+ name: MTEB CQADupstackEnglishRetrieval
287
+ config: default
288
+ split: test
289
+ revision: None
290
+ metrics:
291
+ - type: map_at_1
292
+ value: 31.145
293
+ - type: map_at_10
294
+ value: 40.996
295
+ - type: map_at_100
296
+ value: 42.266999999999996
297
+ - type: map_at_1000
298
+ value: 42.397
299
+ - type: map_at_3
300
+ value: 38.005
301
+ - type: map_at_5
302
+ value: 39.628
303
+ - type: mrr_at_1
304
+ value: 38.344
305
+ - type: mrr_at_10
306
+ value: 46.827000000000005
307
+ - type: mrr_at_100
308
+ value: 47.446
309
+ - type: mrr_at_1000
310
+ value: 47.489
311
+ - type: mrr_at_3
312
+ value: 44.448
313
+ - type: mrr_at_5
314
+ value: 45.747
315
+ - type: ndcg_at_1
316
+ value: 38.344
317
+ - type: ndcg_at_10
318
+ value: 46.733000000000004
319
+ - type: ndcg_at_100
320
+ value: 51.103
321
+ - type: ndcg_at_1000
322
+ value: 53.075
323
+ - type: ndcg_at_3
324
+ value: 42.366
325
+ - type: ndcg_at_5
326
+ value: 44.242
327
+ - type: precision_at_1
328
+ value: 38.344
329
+ - type: precision_at_10
330
+ value: 8.822000000000001
331
+ - type: precision_at_100
332
+ value: 1.417
333
+ - type: precision_at_1000
334
+ value: 0.187
335
+ - type: precision_at_3
336
+ value: 20.403
337
+ - type: precision_at_5
338
+ value: 14.306
339
+ - type: recall_at_1
340
+ value: 31.145
341
+ - type: recall_at_10
342
+ value: 56.909
343
+ - type: recall_at_100
344
+ value: 75.274
345
+ - type: recall_at_1000
346
+ value: 87.629
347
+ - type: recall_at_3
348
+ value: 43.784
349
+ - type: recall_at_5
350
+ value: 49.338
351
+ - task:
352
+ type: Retrieval
353
+ dataset:
354
+ type: BeIR/cqadupstack
355
+ name: MTEB CQADupstackGamingRetrieval
356
+ config: default
357
+ split: test
358
+ revision: None
359
+ metrics:
360
+ - type: map_at_1
361
+ value: 38.83
362
+ - type: map_at_10
363
+ value: 51.553000000000004
364
+ - type: map_at_100
365
+ value: 52.581
366
+ - type: map_at_1000
367
+ value: 52.638
368
+ - type: map_at_3
369
+ value: 48.112
370
+ - type: map_at_5
371
+ value: 50.095
372
+ - type: mrr_at_1
373
+ value: 44.513999999999996
374
+ - type: mrr_at_10
375
+ value: 54.998000000000005
376
+ - type: mrr_at_100
377
+ value: 55.650999999999996
378
+ - type: mrr_at_1000
379
+ value: 55.679
380
+ - type: mrr_at_3
381
+ value: 52.602000000000004
382
+ - type: mrr_at_5
383
+ value: 53.931
384
+ - type: ndcg_at_1
385
+ value: 44.513999999999996
386
+ - type: ndcg_at_10
387
+ value: 57.67400000000001
388
+ - type: ndcg_at_100
389
+ value: 61.663999999999994
390
+ - type: ndcg_at_1000
391
+ value: 62.743
392
+ - type: ndcg_at_3
393
+ value: 51.964
394
+ - type: ndcg_at_5
395
+ value: 54.773
396
+ - type: precision_at_1
397
+ value: 44.513999999999996
398
+ - type: precision_at_10
399
+ value: 9.423
400
+ - type: precision_at_100
401
+ value: 1.2309999999999999
402
+ - type: precision_at_1000
403
+ value: 0.13699999999999998
404
+ - type: precision_at_3
405
+ value: 23.323
406
+ - type: precision_at_5
407
+ value: 16.163
408
+ - type: recall_at_1
409
+ value: 38.83
410
+ - type: recall_at_10
411
+ value: 72.327
412
+ - type: recall_at_100
413
+ value: 89.519
414
+ - type: recall_at_1000
415
+ value: 97.041
416
+ - type: recall_at_3
417
+ value: 57.206
418
+ - type: recall_at_5
419
+ value: 63.88399999999999
420
+ - task:
421
+ type: Retrieval
422
+ dataset:
423
+ type: BeIR/cqadupstack
424
+ name: MTEB CQADupstackGisRetrieval
425
+ config: default
426
+ split: test
427
+ revision: None
428
+ metrics:
429
+ - type: map_at_1
430
+ value: 25.484
431
+ - type: map_at_10
432
+ value: 34.527
433
+ - type: map_at_100
434
+ value: 35.661
435
+ - type: map_at_1000
436
+ value: 35.739
437
+ - type: map_at_3
438
+ value: 32.199
439
+ - type: map_at_5
440
+ value: 33.632
441
+ - type: mrr_at_1
442
+ value: 27.458
443
+ - type: mrr_at_10
444
+ value: 36.543
445
+ - type: mrr_at_100
446
+ value: 37.482
447
+ - type: mrr_at_1000
448
+ value: 37.543
449
+ - type: mrr_at_3
450
+ value: 34.256
451
+ - type: mrr_at_5
452
+ value: 35.618
453
+ - type: ndcg_at_1
454
+ value: 27.458
455
+ - type: ndcg_at_10
456
+ value: 39.396
457
+ - type: ndcg_at_100
458
+ value: 44.742
459
+ - type: ndcg_at_1000
460
+ value: 46.708
461
+ - type: ndcg_at_3
462
+ value: 34.817
463
+ - type: ndcg_at_5
464
+ value: 37.247
465
+ - type: precision_at_1
466
+ value: 27.458
467
+ - type: precision_at_10
468
+ value: 5.976999999999999
469
+ - type: precision_at_100
470
+ value: 0.907
471
+ - type: precision_at_1000
472
+ value: 0.11100000000000002
473
+ - type: precision_at_3
474
+ value: 14.878
475
+ - type: precision_at_5
476
+ value: 10.35
477
+ - type: recall_at_1
478
+ value: 25.484
479
+ - type: recall_at_10
480
+ value: 52.317
481
+ - type: recall_at_100
482
+ value: 76.701
483
+ - type: recall_at_1000
484
+ value: 91.408
485
+ - type: recall_at_3
486
+ value: 40.043
487
+ - type: recall_at_5
488
+ value: 45.879
489
+ - task:
490
+ type: Retrieval
491
+ dataset:
492
+ type: BeIR/cqadupstack
493
+ name: MTEB CQADupstackMathematicaRetrieval
494
+ config: default
495
+ split: test
496
+ revision: None
497
+ metrics:
498
+ - type: map_at_1
499
+ value: 16.719
500
+ - type: map_at_10
501
+ value: 25.269000000000002
502
+ - type: map_at_100
503
+ value: 26.442
504
+ - type: map_at_1000
505
+ value: 26.557
506
+ - type: map_at_3
507
+ value: 22.56
508
+ - type: map_at_5
509
+ value: 24.082
510
+ - type: mrr_at_1
511
+ value: 20.896
512
+ - type: mrr_at_10
513
+ value: 29.982999999999997
514
+ - type: mrr_at_100
515
+ value: 30.895
516
+ - type: mrr_at_1000
517
+ value: 30.961
518
+ - type: mrr_at_3
519
+ value: 27.239
520
+ - type: mrr_at_5
521
+ value: 28.787000000000003
522
+ - type: ndcg_at_1
523
+ value: 20.896
524
+ - type: ndcg_at_10
525
+ value: 30.814000000000004
526
+ - type: ndcg_at_100
527
+ value: 36.418
528
+ - type: ndcg_at_1000
529
+ value: 39.182
530
+ - type: ndcg_at_3
531
+ value: 25.807999999999996
532
+ - type: ndcg_at_5
533
+ value: 28.143
534
+ - type: precision_at_1
535
+ value: 20.896
536
+ - type: precision_at_10
537
+ value: 5.821
538
+ - type: precision_at_100
539
+ value: 0.991
540
+ - type: precision_at_1000
541
+ value: 0.136
542
+ - type: precision_at_3
543
+ value: 12.562000000000001
544
+ - type: precision_at_5
545
+ value: 9.254
546
+ - type: recall_at_1
547
+ value: 16.719
548
+ - type: recall_at_10
549
+ value: 43.155
550
+ - type: recall_at_100
551
+ value: 67.831
552
+ - type: recall_at_1000
553
+ value: 87.617
554
+ - type: recall_at_3
555
+ value: 29.259
556
+ - type: recall_at_5
557
+ value: 35.260999999999996
558
+ - task:
559
+ type: Retrieval
560
+ dataset:
561
+ type: BeIR/cqadupstack
562
+ name: MTEB CQADupstackPhysicsRetrieval
563
+ config: default
564
+ split: test
565
+ revision: None
566
+ metrics:
567
+ - type: map_at_1
568
+ value: 29.398999999999997
569
+ - type: map_at_10
570
+ value: 39.876
571
+ - type: map_at_100
572
+ value: 41.205999999999996
573
+ - type: map_at_1000
574
+ value: 41.321999999999996
575
+ - type: map_at_3
576
+ value: 36.588
577
+ - type: map_at_5
578
+ value: 38.538
579
+ - type: mrr_at_1
580
+ value: 35.9
581
+ - type: mrr_at_10
582
+ value: 45.528
583
+ - type: mrr_at_100
584
+ value: 46.343
585
+ - type: mrr_at_1000
586
+ value: 46.388
587
+ - type: mrr_at_3
588
+ value: 42.862
589
+ - type: mrr_at_5
590
+ value: 44.440000000000005
591
+ - type: ndcg_at_1
592
+ value: 35.9
593
+ - type: ndcg_at_10
594
+ value: 45.987
595
+ - type: ndcg_at_100
596
+ value: 51.370000000000005
597
+ - type: ndcg_at_1000
598
+ value: 53.400000000000006
599
+ - type: ndcg_at_3
600
+ value: 40.841
601
+ - type: ndcg_at_5
602
+ value: 43.447
603
+ - type: precision_at_1
604
+ value: 35.9
605
+ - type: precision_at_10
606
+ value: 8.393
607
+ - type: precision_at_100
608
+ value: 1.283
609
+ - type: precision_at_1000
610
+ value: 0.166
611
+ - type: precision_at_3
612
+ value: 19.538
613
+ - type: precision_at_5
614
+ value: 13.975000000000001
615
+ - type: recall_at_1
616
+ value: 29.398999999999997
617
+ - type: recall_at_10
618
+ value: 58.361
619
+ - type: recall_at_100
620
+ value: 81.081
621
+ - type: recall_at_1000
622
+ value: 94.004
623
+ - type: recall_at_3
624
+ value: 43.657000000000004
625
+ - type: recall_at_5
626
+ value: 50.519999999999996
627
+ - task:
628
+ type: Retrieval
629
+ dataset:
630
+ type: BeIR/cqadupstack
631
+ name: MTEB CQADupstackProgrammersRetrieval
632
+ config: default
633
+ split: test
634
+ revision: None
635
+ metrics:
636
+ - type: map_at_1
637
+ value: 21.589
638
+ - type: map_at_10
639
+ value: 31.608999999999998
640
+ - type: map_at_100
641
+ value: 33.128
642
+ - type: map_at_1000
643
+ value: 33.247
644
+ - type: map_at_3
645
+ value: 28.671999999999997
646
+ - type: map_at_5
647
+ value: 30.233999999999998
648
+ - type: mrr_at_1
649
+ value: 26.712000000000003
650
+ - type: mrr_at_10
651
+ value: 36.713
652
+ - type: mrr_at_100
653
+ value: 37.713
654
+ - type: mrr_at_1000
655
+ value: 37.771
656
+ - type: mrr_at_3
657
+ value: 34.075
658
+ - type: mrr_at_5
659
+ value: 35.451
660
+ - type: ndcg_at_1
661
+ value: 26.712000000000003
662
+ - type: ndcg_at_10
663
+ value: 37.519999999999996
664
+ - type: ndcg_at_100
665
+ value: 43.946000000000005
666
+ - type: ndcg_at_1000
667
+ value: 46.297
668
+ - type: ndcg_at_3
669
+ value: 32.551
670
+ - type: ndcg_at_5
671
+ value: 34.660999999999994
672
+ - type: precision_at_1
673
+ value: 26.712000000000003
674
+ - type: precision_at_10
675
+ value: 7.066
676
+ - type: precision_at_100
677
+ value: 1.216
678
+ - type: precision_at_1000
679
+ value: 0.157
680
+ - type: precision_at_3
681
+ value: 15.906
682
+ - type: precision_at_5
683
+ value: 11.437999999999999
684
+ - type: recall_at_1
685
+ value: 21.589
686
+ - type: recall_at_10
687
+ value: 50.090999999999994
688
+ - type: recall_at_100
689
+ value: 77.43900000000001
690
+ - type: recall_at_1000
691
+ value: 93.35900000000001
692
+ - type: recall_at_3
693
+ value: 36.028999999999996
694
+ - type: recall_at_5
695
+ value: 41.698
696
+ - task:
697
+ type: Retrieval
698
+ dataset:
699
+ type: BeIR/cqadupstack
700
+ name: MTEB CQADupstackRetrieval
701
+ config: default
702
+ split: test
703
+ revision: None
704
+ metrics:
705
+ - type: map_at_1
706
+ value: 25.121666666666663
707
+ - type: map_at_10
708
+ value: 34.46258333333334
709
+ - type: map_at_100
710
+ value: 35.710499999999996
711
+ - type: map_at_1000
712
+ value: 35.82691666666666
713
+ - type: map_at_3
714
+ value: 31.563249999999996
715
+ - type: map_at_5
716
+ value: 33.189750000000004
717
+ - type: mrr_at_1
718
+ value: 29.66441666666667
719
+ - type: mrr_at_10
720
+ value: 38.5455
721
+ - type: mrr_at_100
722
+ value: 39.39566666666667
723
+ - type: mrr_at_1000
724
+ value: 39.45325
725
+ - type: mrr_at_3
726
+ value: 36.003333333333345
727
+ - type: mrr_at_5
728
+ value: 37.440916666666666
729
+ - type: ndcg_at_1
730
+ value: 29.66441666666667
731
+ - type: ndcg_at_10
732
+ value: 39.978416666666675
733
+ - type: ndcg_at_100
734
+ value: 45.278666666666666
735
+ - type: ndcg_at_1000
736
+ value: 47.52275
737
+ - type: ndcg_at_3
738
+ value: 35.00058333333334
739
+ - type: ndcg_at_5
740
+ value: 37.34908333333333
741
+ - type: precision_at_1
742
+ value: 29.66441666666667
743
+ - type: precision_at_10
744
+ value: 7.094500000000001
745
+ - type: precision_at_100
746
+ value: 1.1523333333333332
747
+ - type: precision_at_1000
748
+ value: 0.15358333333333332
749
+ - type: precision_at_3
750
+ value: 16.184166666666663
751
+ - type: precision_at_5
752
+ value: 11.6005
753
+ - type: recall_at_1
754
+ value: 25.121666666666663
755
+ - type: recall_at_10
756
+ value: 52.23975000000001
757
+ - type: recall_at_100
758
+ value: 75.48408333333333
759
+ - type: recall_at_1000
760
+ value: 90.95316666666668
761
+ - type: recall_at_3
762
+ value: 38.38458333333333
763
+ - type: recall_at_5
764
+ value: 44.39933333333333
765
+ - task:
766
+ type: Retrieval
767
+ dataset:
768
+ type: BeIR/cqadupstack
769
+ name: MTEB CQADupstackStatsRetrieval
770
+ config: default
771
+ split: test
772
+ revision: None
773
+ metrics:
774
+ - type: map_at_1
775
+ value: 23.569000000000003
776
+ - type: map_at_10
777
+ value: 30.389
778
+ - type: map_at_100
779
+ value: 31.396
780
+ - type: map_at_1000
781
+ value: 31.493
782
+ - type: map_at_3
783
+ value: 28.276
784
+ - type: map_at_5
785
+ value: 29.459000000000003
786
+ - type: mrr_at_1
787
+ value: 26.534000000000002
788
+ - type: mrr_at_10
789
+ value: 33.217999999999996
790
+ - type: mrr_at_100
791
+ value: 34.054
792
+ - type: mrr_at_1000
793
+ value: 34.12
794
+ - type: mrr_at_3
795
+ value: 31.058000000000003
796
+ - type: mrr_at_5
797
+ value: 32.330999999999996
798
+ - type: ndcg_at_1
799
+ value: 26.534000000000002
800
+ - type: ndcg_at_10
801
+ value: 34.608
802
+ - type: ndcg_at_100
803
+ value: 39.391999999999996
804
+ - type: ndcg_at_1000
805
+ value: 41.837999999999994
806
+ - type: ndcg_at_3
807
+ value: 30.564999999999998
808
+ - type: ndcg_at_5
809
+ value: 32.509
810
+ - type: precision_at_1
811
+ value: 26.534000000000002
812
+ - type: precision_at_10
813
+ value: 5.414
814
+ - type: precision_at_100
815
+ value: 0.847
816
+ - type: precision_at_1000
817
+ value: 0.11399999999999999
818
+ - type: precision_at_3
819
+ value: 12.986
820
+ - type: precision_at_5
821
+ value: 9.202
822
+ - type: recall_at_1
823
+ value: 23.569000000000003
824
+ - type: recall_at_10
825
+ value: 44.896
826
+ - type: recall_at_100
827
+ value: 66.476
828
+ - type: recall_at_1000
829
+ value: 84.548
830
+ - type: recall_at_3
831
+ value: 33.79
832
+ - type: recall_at_5
833
+ value: 38.512
834
+ - task:
835
+ type: Retrieval
836
+ dataset:
837
+ type: BeIR/cqadupstack
838
+ name: MTEB CQADupstackTexRetrieval
839
+ config: default
840
+ split: test
841
+ revision: None
842
+ metrics:
843
+ - type: map_at_1
844
+ value: 16.36
845
+ - type: map_at_10
846
+ value: 23.57
847
+ - type: map_at_100
848
+ value: 24.698999999999998
849
+ - type: map_at_1000
850
+ value: 24.834999999999997
851
+ - type: map_at_3
852
+ value: 21.093
853
+ - type: map_at_5
854
+ value: 22.418
855
+ - type: mrr_at_1
856
+ value: 19.718
857
+ - type: mrr_at_10
858
+ value: 27.139999999999997
859
+ - type: mrr_at_100
860
+ value: 28.097
861
+ - type: mrr_at_1000
862
+ value: 28.177999999999997
863
+ - type: mrr_at_3
864
+ value: 24.805
865
+ - type: mrr_at_5
866
+ value: 26.121
867
+ - type: ndcg_at_1
868
+ value: 19.718
869
+ - type: ndcg_at_10
870
+ value: 28.238999999999997
871
+ - type: ndcg_at_100
872
+ value: 33.663
873
+ - type: ndcg_at_1000
874
+ value: 36.763
875
+ - type: ndcg_at_3
876
+ value: 23.747
877
+ - type: ndcg_at_5
878
+ value: 25.796000000000003
879
+ - type: precision_at_1
880
+ value: 19.718
881
+ - type: precision_at_10
882
+ value: 5.282
883
+ - type: precision_at_100
884
+ value: 0.9390000000000001
885
+ - type: precision_at_1000
886
+ value: 0.13899999999999998
887
+ - type: precision_at_3
888
+ value: 11.264000000000001
889
+ - type: precision_at_5
890
+ value: 8.341
891
+ - type: recall_at_1
892
+ value: 16.36
893
+ - type: recall_at_10
894
+ value: 38.669
895
+ - type: recall_at_100
896
+ value: 63.184
897
+ - type: recall_at_1000
898
+ value: 85.33800000000001
899
+ - type: recall_at_3
900
+ value: 26.214
901
+ - type: recall_at_5
902
+ value: 31.423000000000002
903
+ - task:
904
+ type: Retrieval
905
+ dataset:
906
+ type: BeIR/cqadupstack
907
+ name: MTEB CQADupstackUnixRetrieval
908
+ config: default
909
+ split: test
910
+ revision: None
911
+ metrics:
912
+ - type: map_at_1
913
+ value: 25.618999999999996
914
+ - type: map_at_10
915
+ value: 34.361999999999995
916
+ - type: map_at_100
917
+ value: 35.534
918
+ - type: map_at_1000
919
+ value: 35.634
920
+ - type: map_at_3
921
+ value: 31.402
922
+ - type: map_at_5
923
+ value: 32.815
924
+ - type: mrr_at_1
925
+ value: 30.037000000000003
926
+ - type: mrr_at_10
927
+ value: 38.284
928
+ - type: mrr_at_100
929
+ value: 39.141999999999996
930
+ - type: mrr_at_1000
931
+ value: 39.2
932
+ - type: mrr_at_3
933
+ value: 35.603
934
+ - type: mrr_at_5
935
+ value: 36.867
936
+ - type: ndcg_at_1
937
+ value: 30.037000000000003
938
+ - type: ndcg_at_10
939
+ value: 39.87
940
+ - type: ndcg_at_100
941
+ value: 45.243
942
+ - type: ndcg_at_1000
943
+ value: 47.507
944
+ - type: ndcg_at_3
945
+ value: 34.371
946
+ - type: ndcg_at_5
947
+ value: 36.521
948
+ - type: precision_at_1
949
+ value: 30.037000000000003
950
+ - type: precision_at_10
951
+ value: 6.819
952
+ - type: precision_at_100
953
+ value: 1.0699999999999998
954
+ - type: precision_at_1000
955
+ value: 0.13699999999999998
956
+ - type: precision_at_3
957
+ value: 15.392
958
+ - type: precision_at_5
959
+ value: 10.821
960
+ - type: recall_at_1
961
+ value: 25.618999999999996
962
+ - type: recall_at_10
963
+ value: 52.869
964
+ - type: recall_at_100
965
+ value: 76.395
966
+ - type: recall_at_1000
967
+ value: 92.19500000000001
968
+ - type: recall_at_3
969
+ value: 37.943
970
+ - type: recall_at_5
971
+ value: 43.342999999999996
972
+ - task:
973
+ type: Retrieval
974
+ dataset:
975
+ type: BeIR/cqadupstack
976
+ name: MTEB CQADupstackWebmastersRetrieval
977
+ config: default
978
+ split: test
979
+ revision: None
980
+ metrics:
981
+ - type: map_at_1
982
+ value: 23.283
983
+ - type: map_at_10
984
+ value: 32.155
985
+ - type: map_at_100
986
+ value: 33.724
987
+ - type: map_at_1000
988
+ value: 33.939
989
+ - type: map_at_3
990
+ value: 29.018
991
+ - type: map_at_5
992
+ value: 30.864000000000004
993
+ - type: mrr_at_1
994
+ value: 28.063
995
+ - type: mrr_at_10
996
+ value: 36.632
997
+ - type: mrr_at_100
998
+ value: 37.606
999
+ - type: mrr_at_1000
1000
+ value: 37.671
1001
+ - type: mrr_at_3
1002
+ value: 33.992
1003
+ - type: mrr_at_5
1004
+ value: 35.613
1005
+ - type: ndcg_at_1
1006
+ value: 28.063
1007
+ - type: ndcg_at_10
1008
+ value: 38.024
1009
+ - type: ndcg_at_100
1010
+ value: 44.292
1011
+ - type: ndcg_at_1000
1012
+ value: 46.818
1013
+ - type: ndcg_at_3
1014
+ value: 32.965
1015
+ - type: ndcg_at_5
1016
+ value: 35.562
1017
+ - type: precision_at_1
1018
+ value: 28.063
1019
+ - type: precision_at_10
1020
+ value: 7.352
1021
+ - type: precision_at_100
1022
+ value: 1.514
1023
+ - type: precision_at_1000
1024
+ value: 0.23800000000000002
1025
+ - type: precision_at_3
1026
+ value: 15.481
1027
+ - type: precision_at_5
1028
+ value: 11.542
1029
+ - type: recall_at_1
1030
+ value: 23.283
1031
+ - type: recall_at_10
1032
+ value: 49.756
1033
+ - type: recall_at_100
1034
+ value: 78.05
1035
+ - type: recall_at_1000
1036
+ value: 93.854
1037
+ - type: recall_at_3
1038
+ value: 35.408
1039
+ - type: recall_at_5
1040
+ value: 42.187000000000005
1041
+ - task:
1042
+ type: Retrieval
1043
+ dataset:
1044
+ type: BeIR/cqadupstack
1045
+ name: MTEB CQADupstackWordpressRetrieval
1046
+ config: default
1047
+ split: test
1048
+ revision: None
1049
+ metrics:
1050
+ - type: map_at_1
1051
+ value: 19.201999999999998
1052
+ - type: map_at_10
1053
+ value: 26.826
1054
+ - type: map_at_100
1055
+ value: 27.961000000000002
1056
+ - type: map_at_1000
1057
+ value: 28.066999999999997
1058
+ - type: map_at_3
1059
+ value: 24.237000000000002
1060
+ - type: map_at_5
1061
+ value: 25.811
1062
+ - type: mrr_at_1
1063
+ value: 20.887
1064
+ - type: mrr_at_10
1065
+ value: 28.660000000000004
1066
+ - type: mrr_at_100
1067
+ value: 29.660999999999998
1068
+ - type: mrr_at_1000
1069
+ value: 29.731
1070
+ - type: mrr_at_3
1071
+ value: 26.155
1072
+ - type: mrr_at_5
1073
+ value: 27.68
1074
+ - type: ndcg_at_1
1075
+ value: 20.887
1076
+ - type: ndcg_at_10
1077
+ value: 31.523
1078
+ - type: ndcg_at_100
1079
+ value: 37.055
1080
+ - type: ndcg_at_1000
1081
+ value: 39.579
1082
+ - type: ndcg_at_3
1083
+ value: 26.529000000000003
1084
+ - type: ndcg_at_5
1085
+ value: 29.137
1086
+ - type: precision_at_1
1087
+ value: 20.887
1088
+ - type: precision_at_10
1089
+ value: 5.065
1090
+ - type: precision_at_100
1091
+ value: 0.856
1092
+ - type: precision_at_1000
1093
+ value: 0.11900000000000001
1094
+ - type: precision_at_3
1095
+ value: 11.399
1096
+ - type: precision_at_5
1097
+ value: 8.392
1098
+ - type: recall_at_1
1099
+ value: 19.201999999999998
1100
+ - type: recall_at_10
1101
+ value: 44.285000000000004
1102
+ - type: recall_at_100
1103
+ value: 69.768
1104
+ - type: recall_at_1000
1105
+ value: 88.302
1106
+ - type: recall_at_3
1107
+ value: 30.804
1108
+ - type: recall_at_5
1109
+ value: 37.039
1110
+ - task:
1111
+ type: Retrieval
1112
+ dataset:
1113
+ type: climate-fever
1114
+ name: MTEB ClimateFEVER
1115
+ config: default
1116
+ split: test
1117
+ revision: None
1118
+ metrics:
1119
+ - type: map_at_1
1120
+ value: 11.244
1121
+ - type: map_at_10
1122
+ value: 18.956
1123
+ - type: map_at_100
1124
+ value: 20.674
1125
+ - type: map_at_1000
1126
+ value: 20.863
1127
+ - type: map_at_3
1128
+ value: 15.923000000000002
1129
+ - type: map_at_5
1130
+ value: 17.518
1131
+ - type: mrr_at_1
1132
+ value: 25.080999999999996
1133
+ - type: mrr_at_10
1134
+ value: 35.94
1135
+ - type: mrr_at_100
1136
+ value: 36.969
1137
+ - type: mrr_at_1000
1138
+ value: 37.013
1139
+ - type: mrr_at_3
1140
+ value: 32.617000000000004
1141
+ - type: mrr_at_5
1142
+ value: 34.682
1143
+ - type: ndcg_at_1
1144
+ value: 25.080999999999996
1145
+ - type: ndcg_at_10
1146
+ value: 26.539
1147
+ - type: ndcg_at_100
1148
+ value: 33.601
1149
+ - type: ndcg_at_1000
1150
+ value: 37.203
1151
+ - type: ndcg_at_3
1152
+ value: 21.695999999999998
1153
+ - type: ndcg_at_5
1154
+ value: 23.567
1155
+ - type: precision_at_1
1156
+ value: 25.080999999999996
1157
+ - type: precision_at_10
1158
+ value: 8.143
1159
+ - type: precision_at_100
1160
+ value: 1.5650000000000002
1161
+ - type: precision_at_1000
1162
+ value: 0.22300000000000003
1163
+ - type: precision_at_3
1164
+ value: 15.983
1165
+ - type: precision_at_5
1166
+ value: 12.417
1167
+ - type: recall_at_1
1168
+ value: 11.244
1169
+ - type: recall_at_10
1170
+ value: 31.457
1171
+ - type: recall_at_100
1172
+ value: 55.92
1173
+ - type: recall_at_1000
1174
+ value: 76.372
1175
+ - type: recall_at_3
1176
+ value: 19.784
1177
+ - type: recall_at_5
1178
+ value: 24.857000000000003
1179
+ - task:
1180
+ type: Retrieval
1181
+ dataset:
1182
+ type: dbpedia-entity
1183
+ name: MTEB DBPedia
1184
+ config: default
1185
+ split: test
1186
+ revision: None
1187
+ metrics:
1188
+ - type: map_at_1
1189
+ value: 8.595
1190
+ - type: map_at_10
1191
+ value: 18.75
1192
+ - type: map_at_100
1193
+ value: 26.354
1194
+ - type: map_at_1000
1195
+ value: 27.912
1196
+ - type: map_at_3
1197
+ value: 13.794
1198
+ - type: map_at_5
1199
+ value: 16.021
1200
+ - type: mrr_at_1
1201
+ value: 65.75
1202
+ - type: mrr_at_10
1203
+ value: 73.837
1204
+ - type: mrr_at_100
1205
+ value: 74.22800000000001
1206
+ - type: mrr_at_1000
1207
+ value: 74.234
1208
+ - type: mrr_at_3
1209
+ value: 72.5
1210
+ - type: mrr_at_5
1211
+ value: 73.387
1212
+ - type: ndcg_at_1
1213
+ value: 52.625
1214
+ - type: ndcg_at_10
1215
+ value: 39.101
1216
+ - type: ndcg_at_100
1217
+ value: 43.836000000000006
1218
+ - type: ndcg_at_1000
1219
+ value: 51.086
1220
+ - type: ndcg_at_3
1221
+ value: 44.229
1222
+ - type: ndcg_at_5
1223
+ value: 41.555
1224
+ - type: precision_at_1
1225
+ value: 65.75
1226
+ - type: precision_at_10
1227
+ value: 30.45
1228
+ - type: precision_at_100
1229
+ value: 9.81
1230
+ - type: precision_at_1000
1231
+ value: 2.045
1232
+ - type: precision_at_3
1233
+ value: 48.667
1234
+ - type: precision_at_5
1235
+ value: 40.8
1236
+ - type: recall_at_1
1237
+ value: 8.595
1238
+ - type: recall_at_10
1239
+ value: 24.201
1240
+ - type: recall_at_100
1241
+ value: 50.096
1242
+ - type: recall_at_1000
1243
+ value: 72.677
1244
+ - type: recall_at_3
1245
+ value: 15.212
1246
+ - type: recall_at_5
1247
+ value: 18.745
1248
+ - task:
1249
+ type: Classification
1250
+ dataset:
1251
+ type: mteb/emotion
1252
+ name: MTEB EmotionClassification
1253
+ config: default
1254
+ split: test
1255
+ revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1256
+ metrics:
1257
+ - type: accuracy
1258
+ value: 46.565
1259
+ - type: f1
1260
+ value: 41.49914329345582
1261
+ - task:
1262
+ type: Retrieval
1263
+ dataset:
1264
+ type: fever
1265
+ name: MTEB FEVER
1266
+ config: default
1267
+ split: test
1268
+ revision: None
1269
+ metrics:
1270
+ - type: map_at_1
1271
+ value: 66.60000000000001
1272
+ - type: map_at_10
1273
+ value: 76.838
1274
+ - type: map_at_100
1275
+ value: 77.076
1276
+ - type: map_at_1000
1277
+ value: 77.09
1278
+ - type: map_at_3
1279
+ value: 75.545
1280
+ - type: map_at_5
1281
+ value: 76.39
1282
+ - type: mrr_at_1
1283
+ value: 71.707
1284
+ - type: mrr_at_10
1285
+ value: 81.514
1286
+ - type: mrr_at_100
1287
+ value: 81.64099999999999
1288
+ - type: mrr_at_1000
1289
+ value: 81.645
1290
+ - type: mrr_at_3
1291
+ value: 80.428
1292
+ - type: mrr_at_5
1293
+ value: 81.159
1294
+ - type: ndcg_at_1
1295
+ value: 71.707
1296
+ - type: ndcg_at_10
1297
+ value: 81.545
1298
+ - type: ndcg_at_100
1299
+ value: 82.477
1300
+ - type: ndcg_at_1000
1301
+ value: 82.73899999999999
1302
+ - type: ndcg_at_3
1303
+ value: 79.292
1304
+ - type: ndcg_at_5
1305
+ value: 80.599
1306
+ - type: precision_at_1
1307
+ value: 71.707
1308
+ - type: precision_at_10
1309
+ value: 10.035
1310
+ - type: precision_at_100
1311
+ value: 1.068
1312
+ - type: precision_at_1000
1313
+ value: 0.11100000000000002
1314
+ - type: precision_at_3
1315
+ value: 30.918
1316
+ - type: precision_at_5
1317
+ value: 19.328
1318
+ - type: recall_at_1
1319
+ value: 66.60000000000001
1320
+ - type: recall_at_10
1321
+ value: 91.353
1322
+ - type: recall_at_100
1323
+ value: 95.21
1324
+ - type: recall_at_1000
1325
+ value: 96.89999999999999
1326
+ - type: recall_at_3
1327
+ value: 85.188
1328
+ - type: recall_at_5
1329
+ value: 88.52
1330
+ - task:
1331
+ type: Retrieval
1332
+ dataset:
1333
+ type: fiqa
1334
+ name: MTEB FiQA2018
1335
+ config: default
1336
+ split: test
1337
+ revision: None
1338
+ metrics:
1339
+ - type: map_at_1
1340
+ value: 19.338
1341
+ - type: map_at_10
1342
+ value: 31.752000000000002
1343
+ - type: map_at_100
1344
+ value: 33.516
1345
+ - type: map_at_1000
1346
+ value: 33.694
1347
+ - type: map_at_3
1348
+ value: 27.716
1349
+ - type: map_at_5
1350
+ value: 29.67
1351
+ - type: mrr_at_1
1352
+ value: 38.117000000000004
1353
+ - type: mrr_at_10
1354
+ value: 47.323
1355
+ - type: mrr_at_100
1356
+ value: 48.13
1357
+ - type: mrr_at_1000
1358
+ value: 48.161
1359
+ - type: mrr_at_3
1360
+ value: 45.062000000000005
1361
+ - type: mrr_at_5
1362
+ value: 46.358
1363
+ - type: ndcg_at_1
1364
+ value: 38.117000000000004
1365
+ - type: ndcg_at_10
1366
+ value: 39.353
1367
+ - type: ndcg_at_100
1368
+ value: 46.044000000000004
1369
+ - type: ndcg_at_1000
1370
+ value: 49.083
1371
+ - type: ndcg_at_3
1372
+ value: 35.891
1373
+ - type: ndcg_at_5
1374
+ value: 36.661
1375
+ - type: precision_at_1
1376
+ value: 38.117000000000004
1377
+ - type: precision_at_10
1378
+ value: 11.187999999999999
1379
+ - type: precision_at_100
1380
+ value: 1.802
1381
+ - type: precision_at_1000
1382
+ value: 0.234
1383
+ - type: precision_at_3
1384
+ value: 24.126
1385
+ - type: precision_at_5
1386
+ value: 17.562
1387
+ - type: recall_at_1
1388
+ value: 19.338
1389
+ - type: recall_at_10
1390
+ value: 45.735
1391
+ - type: recall_at_100
1392
+ value: 71.281
1393
+ - type: recall_at_1000
1394
+ value: 89.537
1395
+ - type: recall_at_3
1396
+ value: 32.525
1397
+ - type: recall_at_5
1398
+ value: 37.671
1399
+ - task:
1400
+ type: Retrieval
1401
+ dataset:
1402
+ type: hotpotqa
1403
+ name: MTEB HotpotQA
1404
+ config: default
1405
+ split: test
1406
+ revision: None
1407
+ metrics:
1408
+ - type: map_at_1
1409
+ value: 36.995
1410
+ - type: map_at_10
1411
+ value: 55.032000000000004
1412
+ - type: map_at_100
1413
+ value: 55.86
1414
+ - type: map_at_1000
1415
+ value: 55.932
1416
+ - type: map_at_3
1417
+ value: 52.125
1418
+ - type: map_at_5
1419
+ value: 53.884
1420
+ - type: mrr_at_1
1421
+ value: 73.991
1422
+ - type: mrr_at_10
1423
+ value: 80.096
1424
+ - type: mrr_at_100
1425
+ value: 80.32000000000001
1426
+ - type: mrr_at_1000
1427
+ value: 80.331
1428
+ - type: mrr_at_3
1429
+ value: 79.037
1430
+ - type: mrr_at_5
1431
+ value: 79.719
1432
+ - type: ndcg_at_1
1433
+ value: 73.991
1434
+ - type: ndcg_at_10
1435
+ value: 63.786
1436
+ - type: ndcg_at_100
1437
+ value: 66.78
1438
+ - type: ndcg_at_1000
1439
+ value: 68.255
1440
+ - type: ndcg_at_3
1441
+ value: 59.501000000000005
1442
+ - type: ndcg_at_5
1443
+ value: 61.82299999999999
1444
+ - type: precision_at_1
1445
+ value: 73.991
1446
+ - type: precision_at_10
1447
+ value: 13.157
1448
+ - type: precision_at_100
1449
+ value: 1.552
1450
+ - type: precision_at_1000
1451
+ value: 0.17500000000000002
1452
+ - type: precision_at_3
1453
+ value: 37.519999999999996
1454
+ - type: precision_at_5
1455
+ value: 24.351
1456
+ - type: recall_at_1
1457
+ value: 36.995
1458
+ - type: recall_at_10
1459
+ value: 65.78699999999999
1460
+ - type: recall_at_100
1461
+ value: 77.583
1462
+ - type: recall_at_1000
1463
+ value: 87.421
1464
+ - type: recall_at_3
1465
+ value: 56.279999999999994
1466
+ - type: recall_at_5
1467
+ value: 60.878
1468
+ - task:
1469
+ type: Classification
1470
+ dataset:
1471
+ type: mteb/imdb
1472
+ name: MTEB ImdbClassification
1473
+ config: default
1474
+ split: test
1475
+ revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1476
+ metrics:
1477
+ - type: accuracy
1478
+ value: 86.80239999999999
1479
+ - type: ap
1480
+ value: 81.97305141128378
1481
+ - type: f1
1482
+ value: 86.76976305549273
1483
+ - task:
1484
+ type: Retrieval
1485
+ dataset:
1486
+ type: msmarco
1487
+ name: MTEB MSMARCO
1488
+ config: default
1489
+ split: dev
1490
+ revision: None
1491
+ metrics:
1492
+ - type: map_at_1
1493
+ value: 21.166
1494
+ - type: map_at_10
1495
+ value: 33.396
1496
+ - type: map_at_100
1497
+ value: 34.588
1498
+ - type: map_at_1000
1499
+ value: 34.637
1500
+ - type: map_at_3
1501
+ value: 29.509999999999998
1502
+ - type: map_at_5
1503
+ value: 31.719
1504
+ - type: mrr_at_1
1505
+ value: 21.762
1506
+ - type: mrr_at_10
1507
+ value: 33.969
1508
+ - type: mrr_at_100
1509
+ value: 35.099000000000004
1510
+ - type: mrr_at_1000
1511
+ value: 35.141
1512
+ - type: mrr_at_3
1513
+ value: 30.148000000000003
1514
+ - type: mrr_at_5
1515
+ value: 32.324000000000005
1516
+ - type: ndcg_at_1
1517
+ value: 21.776999999999997
1518
+ - type: ndcg_at_10
1519
+ value: 40.306999999999995
1520
+ - type: ndcg_at_100
1521
+ value: 46.068
1522
+ - type: ndcg_at_1000
1523
+ value: 47.3
1524
+ - type: ndcg_at_3
1525
+ value: 32.416
1526
+ - type: ndcg_at_5
1527
+ value: 36.345
1528
+ - type: precision_at_1
1529
+ value: 21.776999999999997
1530
+ - type: precision_at_10
1531
+ value: 6.433
1532
+ - type: precision_at_100
1533
+ value: 0.932
1534
+ - type: precision_at_1000
1535
+ value: 0.104
1536
+ - type: precision_at_3
1537
+ value: 13.897
1538
+ - type: precision_at_5
1539
+ value: 10.324
1540
+ - type: recall_at_1
1541
+ value: 21.166
1542
+ - type: recall_at_10
1543
+ value: 61.587
1544
+ - type: recall_at_100
1545
+ value: 88.251
1546
+ - type: recall_at_1000
1547
+ value: 97.727
1548
+ - type: recall_at_3
1549
+ value: 40.196
1550
+ - type: recall_at_5
1551
+ value: 49.611
1552
+ - task:
1553
+ type: Classification
1554
+ dataset:
1555
+ type: mteb/mtop_domain
1556
+ name: MTEB MTOPDomainClassification (en)
1557
+ config: en
1558
+ split: test
1559
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1560
+ metrics:
1561
+ - type: accuracy
1562
+ value: 93.04605563155496
1563
+ - type: f1
1564
+ value: 92.78007303978372
1565
+ - task:
1566
+ type: Classification
1567
+ dataset:
1568
+ type: mteb/mtop_intent
1569
+ name: MTEB MTOPIntentClassification (en)
1570
+ config: en
1571
+ split: test
1572
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1573
+ metrics:
1574
+ - type: accuracy
1575
+ value: 69.65116279069767
1576
+ - type: f1
1577
+ value: 52.75775172527262
1578
+ - task:
1579
+ type: Classification
1580
+ dataset:
1581
+ type: mteb/amazon_massive_intent
1582
+ name: MTEB MassiveIntentClassification (en)
1583
+ config: en
1584
+ split: test
1585
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1586
+ metrics:
1587
+ - type: accuracy
1588
+ value: 70.34633490248822
1589
+ - type: f1
1590
+ value: 68.15345065392562
1591
+ - task:
1592
+ type: Classification
1593
+ dataset:
1594
+ type: mteb/amazon_massive_scenario
1595
+ name: MTEB MassiveScenarioClassification (en)
1596
+ config: en
1597
+ split: test
1598
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
1599
+ metrics:
1600
+ - type: accuracy
1601
+ value: 75.63887020847343
1602
+ - type: f1
1603
+ value: 76.08074680233685
1604
+ - task:
1605
+ type: Clustering
1606
+ dataset:
1607
+ type: mteb/medrxiv-clustering-p2p
1608
+ name: MTEB MedrxivClusteringP2P
1609
+ config: default
1610
+ split: test
1611
+ revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1612
+ metrics:
1613
+ - type: v_measure
1614
+ value: 33.77933406071333
1615
+ - task:
1616
+ type: Clustering
1617
+ dataset:
1618
+ type: mteb/medrxiv-clustering-s2s
1619
+ name: MTEB MedrxivClusteringS2S
1620
+ config: default
1621
+ split: test
1622
+ revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1623
+ metrics:
1624
+ - type: v_measure
1625
+ value: 32.06504927238196
1626
+ - task:
1627
+ type: Reranking
1628
+ dataset:
1629
+ type: mteb/mind_small
1630
+ name: MTEB MindSmallReranking
1631
+ config: default
1632
+ split: test
1633
+ revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1634
+ metrics:
1635
+ - type: map
1636
+ value: 32.20682480490871
1637
+ - type: mrr
1638
+ value: 33.41462721527003
1639
+ - task:
1640
+ type: Retrieval
1641
+ dataset:
1642
+ type: nfcorpus
1643
+ name: MTEB NFCorpus
1644
+ config: default
1645
+ split: test
1646
+ revision: None
1647
+ metrics:
1648
+ - type: map_at_1
1649
+ value: 5.548
1650
+ - type: map_at_10
1651
+ value: 13.086999999999998
1652
+ - type: map_at_100
1653
+ value: 16.698
1654
+ - type: map_at_1000
1655
+ value: 18.151999999999997
1656
+ - type: map_at_3
1657
+ value: 9.576
1658
+ - type: map_at_5
1659
+ value: 11.175
1660
+ - type: mrr_at_1
1661
+ value: 44.272
1662
+ - type: mrr_at_10
1663
+ value: 53.635999999999996
1664
+ - type: mrr_at_100
1665
+ value: 54.228
1666
+ - type: mrr_at_1000
1667
+ value: 54.26499999999999
1668
+ - type: mrr_at_3
1669
+ value: 51.754
1670
+ - type: mrr_at_5
1671
+ value: 53.086
1672
+ - type: ndcg_at_1
1673
+ value: 42.724000000000004
1674
+ - type: ndcg_at_10
1675
+ value: 34.769
1676
+ - type: ndcg_at_100
1677
+ value: 32.283
1678
+ - type: ndcg_at_1000
1679
+ value: 40.843
1680
+ - type: ndcg_at_3
1681
+ value: 39.852
1682
+ - type: ndcg_at_5
1683
+ value: 37.858999999999995
1684
+ - type: precision_at_1
1685
+ value: 44.272
1686
+ - type: precision_at_10
1687
+ value: 26.068
1688
+ - type: precision_at_100
1689
+ value: 8.328000000000001
1690
+ - type: precision_at_1000
1691
+ value: 2.1
1692
+ - type: precision_at_3
1693
+ value: 37.874
1694
+ - type: precision_at_5
1695
+ value: 33.065
1696
+ - type: recall_at_1
1697
+ value: 5.548
1698
+ - type: recall_at_10
1699
+ value: 16.936999999999998
1700
+ - type: recall_at_100
1701
+ value: 33.72
1702
+ - type: recall_at_1000
1703
+ value: 64.348
1704
+ - type: recall_at_3
1705
+ value: 10.764999999999999
1706
+ - type: recall_at_5
1707
+ value: 13.361
1708
+ - task:
1709
+ type: Retrieval
1710
+ dataset:
1711
+ type: nq
1712
+ name: MTEB NQ
1713
+ config: default
1714
+ split: test
1715
+ revision: None
1716
+ metrics:
1717
+ - type: map_at_1
1718
+ value: 28.008
1719
+ - type: map_at_10
1720
+ value: 42.675000000000004
1721
+ - type: map_at_100
1722
+ value: 43.85
1723
+ - type: map_at_1000
1724
+ value: 43.884
1725
+ - type: map_at_3
1726
+ value: 38.286
1727
+ - type: map_at_5
1728
+ value: 40.78
1729
+ - type: mrr_at_1
1730
+ value: 31.518
1731
+ - type: mrr_at_10
1732
+ value: 45.015
1733
+ - type: mrr_at_100
1734
+ value: 45.924
1735
+ - type: mrr_at_1000
1736
+ value: 45.946999999999996
1737
+ - type: mrr_at_3
1738
+ value: 41.348
1739
+ - type: mrr_at_5
1740
+ value: 43.428
1741
+ - type: ndcg_at_1
1742
+ value: 31.489
1743
+ - type: ndcg_at_10
1744
+ value: 50.285999999999994
1745
+ - type: ndcg_at_100
1746
+ value: 55.291999999999994
1747
+ - type: ndcg_at_1000
1748
+ value: 56.05
1749
+ - type: ndcg_at_3
1750
+ value: 41.976
1751
+ - type: ndcg_at_5
1752
+ value: 46.103
1753
+ - type: precision_at_1
1754
+ value: 31.489
1755
+ - type: precision_at_10
1756
+ value: 8.456
1757
+ - type: precision_at_100
1758
+ value: 1.125
1759
+ - type: precision_at_1000
1760
+ value: 0.12
1761
+ - type: precision_at_3
1762
+ value: 19.09
1763
+ - type: precision_at_5
1764
+ value: 13.841000000000001
1765
+ - type: recall_at_1
1766
+ value: 28.008
1767
+ - type: recall_at_10
1768
+ value: 71.21499999999999
1769
+ - type: recall_at_100
1770
+ value: 92.99
1771
+ - type: recall_at_1000
1772
+ value: 98.578
1773
+ - type: recall_at_3
1774
+ value: 49.604
1775
+ - type: recall_at_5
1776
+ value: 59.094
1777
+ - task:
1778
+ type: Retrieval
1779
+ dataset:
1780
+ type: quora
1781
+ name: MTEB QuoraRetrieval
1782
+ config: default
1783
+ split: test
1784
+ revision: None
1785
+ metrics:
1786
+ - type: map_at_1
1787
+ value: 70.351
1788
+ - type: map_at_10
1789
+ value: 84.163
1790
+ - type: map_at_100
1791
+ value: 84.785
1792
+ - type: map_at_1000
1793
+ value: 84.801
1794
+ - type: map_at_3
1795
+ value: 81.16
1796
+ - type: map_at_5
1797
+ value: 83.031
1798
+ - type: mrr_at_1
1799
+ value: 80.96
1800
+ - type: mrr_at_10
1801
+ value: 87.241
1802
+ - type: mrr_at_100
1803
+ value: 87.346
1804
+ - type: mrr_at_1000
1805
+ value: 87.347
1806
+ - type: mrr_at_3
1807
+ value: 86.25699999999999
1808
+ - type: mrr_at_5
1809
+ value: 86.907
1810
+ - type: ndcg_at_1
1811
+ value: 80.97
1812
+ - type: ndcg_at_10
1813
+ value: 88.017
1814
+ - type: ndcg_at_100
1815
+ value: 89.241
1816
+ - type: ndcg_at_1000
1817
+ value: 89.34299999999999
1818
+ - type: ndcg_at_3
1819
+ value: 85.053
1820
+ - type: ndcg_at_5
1821
+ value: 86.663
1822
+ - type: precision_at_1
1823
+ value: 80.97
1824
+ - type: precision_at_10
1825
+ value: 13.358
1826
+ - type: precision_at_100
1827
+ value: 1.525
1828
+ - type: precision_at_1000
1829
+ value: 0.157
1830
+ - type: precision_at_3
1831
+ value: 37.143
1832
+ - type: precision_at_5
1833
+ value: 24.451999999999998
1834
+ - type: recall_at_1
1835
+ value: 70.351
1836
+ - type: recall_at_10
1837
+ value: 95.39800000000001
1838
+ - type: recall_at_100
1839
+ value: 99.55199999999999
1840
+ - type: recall_at_1000
1841
+ value: 99.978
1842
+ - type: recall_at_3
1843
+ value: 86.913
1844
+ - type: recall_at_5
1845
+ value: 91.448
1846
+ - task:
1847
+ type: Clustering
1848
+ dataset:
1849
+ type: mteb/reddit-clustering
1850
+ name: MTEB RedditClustering
1851
+ config: default
1852
+ split: test
1853
+ revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1854
+ metrics:
1855
+ - type: v_measure
1856
+ value: 55.62406719814139
1857
+ - task:
1858
+ type: Clustering
1859
+ dataset:
1860
+ type: mteb/reddit-clustering-p2p
1861
+ name: MTEB RedditClusteringP2P
1862
+ config: default
1863
+ split: test
1864
+ revision: 282350215ef01743dc01b456c7f5241fa8937f16
1865
+ metrics:
1866
+ - type: v_measure
1867
+ value: 61.386700035141736
1868
+ - task:
1869
+ type: Retrieval
1870
+ dataset:
1871
+ type: scidocs
1872
+ name: MTEB SCIDOCS
1873
+ config: default
1874
+ split: test
1875
+ revision: None
1876
+ metrics:
1877
+ - type: map_at_1
1878
+ value: 4.618
1879
+ - type: map_at_10
1880
+ value: 12.920000000000002
1881
+ - type: map_at_100
1882
+ value: 15.304
1883
+ - type: map_at_1000
1884
+ value: 15.656999999999998
1885
+ - type: map_at_3
1886
+ value: 9.187
1887
+ - type: map_at_5
1888
+ value: 10.937
1889
+ - type: mrr_at_1
1890
+ value: 22.8
1891
+ - type: mrr_at_10
1892
+ value: 35.13
1893
+ - type: mrr_at_100
1894
+ value: 36.239
1895
+ - type: mrr_at_1000
1896
+ value: 36.291000000000004
1897
+ - type: mrr_at_3
1898
+ value: 31.917
1899
+ - type: mrr_at_5
1900
+ value: 33.787
1901
+ - type: ndcg_at_1
1902
+ value: 22.8
1903
+ - type: ndcg_at_10
1904
+ value: 21.382
1905
+ - type: ndcg_at_100
1906
+ value: 30.257
1907
+ - type: ndcg_at_1000
1908
+ value: 36.001
1909
+ - type: ndcg_at_3
1910
+ value: 20.43
1911
+ - type: ndcg_at_5
1912
+ value: 17.622
1913
+ - type: precision_at_1
1914
+ value: 22.8
1915
+ - type: precision_at_10
1916
+ value: 11.26
1917
+ - type: precision_at_100
1918
+ value: 2.405
1919
+ - type: precision_at_1000
1920
+ value: 0.377
1921
+ - type: precision_at_3
1922
+ value: 19.633
1923
+ - type: precision_at_5
1924
+ value: 15.68
1925
+ - type: recall_at_1
1926
+ value: 4.618
1927
+ - type: recall_at_10
1928
+ value: 22.811999999999998
1929
+ - type: recall_at_100
1930
+ value: 48.787000000000006
1931
+ - type: recall_at_1000
1932
+ value: 76.63799999999999
1933
+ - type: recall_at_3
1934
+ value: 11.952
1935
+ - type: recall_at_5
1936
+ value: 15.892000000000001
1937
+ - task:
1938
+ type: STS
1939
+ dataset:
1940
+ type: mteb/sickr-sts
1941
+ name: MTEB SICK-R
1942
+ config: default
1943
+ split: test
1944
+ revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1945
+ metrics:
1946
+ - type: cos_sim_pearson
1947
+ value: 84.01529458252244
1948
+ - type: cos_sim_spearman
1949
+ value: 77.92985224770254
1950
+ - type: euclidean_pearson
1951
+ value: 81.04251429422487
1952
+ - type: euclidean_spearman
1953
+ value: 77.92838490549133
1954
+ - type: manhattan_pearson
1955
+ value: 80.95892251458979
1956
+ - type: manhattan_spearman
1957
+ value: 77.81028089705941
1958
+ - task:
1959
+ type: STS
1960
+ dataset:
1961
+ type: mteb/sts12-sts
1962
+ name: MTEB STS12
1963
+ config: default
1964
+ split: test
1965
+ revision: a0d554a64d88156834ff5ae9920b964011b16384
1966
+ metrics:
1967
+ - type: cos_sim_pearson
1968
+ value: 83.97885282534388
1969
+ - type: cos_sim_spearman
1970
+ value: 75.1221970851712
1971
+ - type: euclidean_pearson
1972
+ value: 80.34455956720097
1973
+ - type: euclidean_spearman
1974
+ value: 74.5894274239938
1975
+ - type: manhattan_pearson
1976
+ value: 80.38999766325465
1977
+ - type: manhattan_spearman
1978
+ value: 74.68524557166975
1979
+ - task:
1980
+ type: STS
1981
+ dataset:
1982
+ type: mteb/sts13-sts
1983
+ name: MTEB STS13
1984
+ config: default
1985
+ split: test
1986
+ revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1987
+ metrics:
1988
+ - type: cos_sim_pearson
1989
+ value: 82.95746064915672
1990
+ - type: cos_sim_spearman
1991
+ value: 85.08683458043946
1992
+ - type: euclidean_pearson
1993
+ value: 84.56699492836385
1994
+ - type: euclidean_spearman
1995
+ value: 85.66089116133713
1996
+ - type: manhattan_pearson
1997
+ value: 84.47553323458541
1998
+ - type: manhattan_spearman
1999
+ value: 85.56142206781472
2000
+ - task:
2001
+ type: STS
2002
+ dataset:
2003
+ type: mteb/sts14-sts
2004
+ name: MTEB STS14
2005
+ config: default
2006
+ split: test
2007
+ revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
2008
+ metrics:
2009
+ - type: cos_sim_pearson
2010
+ value: 82.71377893595067
2011
+ - type: cos_sim_spearman
2012
+ value: 81.03453291428589
2013
+ - type: euclidean_pearson
2014
+ value: 82.57136298308613
2015
+ - type: euclidean_spearman
2016
+ value: 81.15839961890875
2017
+ - type: manhattan_pearson
2018
+ value: 82.55157879373837
2019
+ - type: manhattan_spearman
2020
+ value: 81.1540163767054
2021
+ - task:
2022
+ type: STS
2023
+ dataset:
2024
+ type: mteb/sts15-sts
2025
+ name: MTEB STS15
2026
+ config: default
2027
+ split: test
2028
+ revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2029
+ metrics:
2030
+ - type: cos_sim_pearson
2031
+ value: 86.64197832372373
2032
+ - type: cos_sim_spearman
2033
+ value: 88.31966852492485
2034
+ - type: euclidean_pearson
2035
+ value: 87.98692129976983
2036
+ - type: euclidean_spearman
2037
+ value: 88.6247340837856
2038
+ - type: manhattan_pearson
2039
+ value: 87.90437827826412
2040
+ - type: manhattan_spearman
2041
+ value: 88.56278787131457
2042
+ - task:
2043
+ type: STS
2044
+ dataset:
2045
+ type: mteb/sts16-sts
2046
+ name: MTEB STS16
2047
+ config: default
2048
+ split: test
2049
+ revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2050
+ metrics:
2051
+ - type: cos_sim_pearson
2052
+ value: 81.84159950146693
2053
+ - type: cos_sim_spearman
2054
+ value: 83.90678384140168
2055
+ - type: euclidean_pearson
2056
+ value: 83.19005018860221
2057
+ - type: euclidean_spearman
2058
+ value: 84.16260415876295
2059
+ - type: manhattan_pearson
2060
+ value: 83.05030612994494
2061
+ - type: manhattan_spearman
2062
+ value: 83.99605629718336
2063
+ - task:
2064
+ type: STS
2065
+ dataset:
2066
+ type: mteb/sts17-crosslingual-sts
2067
+ name: MTEB STS17 (en-en)
2068
+ config: en-en
2069
+ split: test
2070
+ revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2071
+ metrics:
2072
+ - type: cos_sim_pearson
2073
+ value: 87.49935350176666
2074
+ - type: cos_sim_spearman
2075
+ value: 87.59086606735383
2076
+ - type: euclidean_pearson
2077
+ value: 88.06537181129983
2078
+ - type: euclidean_spearman
2079
+ value: 87.6687448086014
2080
+ - type: manhattan_pearson
2081
+ value: 87.96599131972935
2082
+ - type: manhattan_spearman
2083
+ value: 87.63295748969642
2084
+ - task:
2085
+ type: STS
2086
+ dataset:
2087
+ type: mteb/sts22-crosslingual-sts
2088
+ name: MTEB STS22 (en)
2089
+ config: en
2090
+ split: test
2091
+ revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2092
+ metrics:
2093
+ - type: cos_sim_pearson
2094
+ value: 67.68232799482763
2095
+ - type: cos_sim_spearman
2096
+ value: 67.99930378085793
2097
+ - type: euclidean_pearson
2098
+ value: 68.50275360001696
2099
+ - type: euclidean_spearman
2100
+ value: 67.81588179309259
2101
+ - type: manhattan_pearson
2102
+ value: 68.5892154749763
2103
+ - type: manhattan_spearman
2104
+ value: 67.84357259640682
2105
+ - task:
2106
+ type: STS
2107
+ dataset:
2108
+ type: mteb/stsbenchmark-sts
2109
+ name: MTEB STSBenchmark
2110
+ config: default
2111
+ split: test
2112
+ revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2113
+ metrics:
2114
+ - type: cos_sim_pearson
2115
+ value: 84.37049618406554
2116
+ - type: cos_sim_spearman
2117
+ value: 85.57014313159492
2118
+ - type: euclidean_pearson
2119
+ value: 85.57469513908282
2120
+ - type: euclidean_spearman
2121
+ value: 85.661948135258
2122
+ - type: manhattan_pearson
2123
+ value: 85.36866831229028
2124
+ - type: manhattan_spearman
2125
+ value: 85.5043455368843
2126
+ - task:
2127
+ type: Reranking
2128
+ dataset:
2129
+ type: mteb/scidocs-reranking
2130
+ name: MTEB SciDocsRR
2131
+ config: default
2132
+ split: test
2133
+ revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2134
+ metrics:
2135
+ - type: map
2136
+ value: 84.83259065376154
2137
+ - type: mrr
2138
+ value: 95.58455433455433
2139
+ - task:
2140
+ type: Retrieval
2141
+ dataset:
2142
+ type: scifact
2143
+ name: MTEB SciFact
2144
+ config: default
2145
+ split: test
2146
+ revision: None
2147
+ metrics:
2148
+ - type: map_at_1
2149
+ value: 58.817
2150
+ - type: map_at_10
2151
+ value: 68.459
2152
+ - type: map_at_100
2153
+ value: 68.951
2154
+ - type: map_at_1000
2155
+ value: 68.979
2156
+ - type: map_at_3
2157
+ value: 65.791
2158
+ - type: map_at_5
2159
+ value: 67.583
2160
+ - type: mrr_at_1
2161
+ value: 61.667
2162
+ - type: mrr_at_10
2163
+ value: 69.368
2164
+ - type: mrr_at_100
2165
+ value: 69.721
2166
+ - type: mrr_at_1000
2167
+ value: 69.744
2168
+ - type: mrr_at_3
2169
+ value: 67.278
2170
+ - type: mrr_at_5
2171
+ value: 68.611
2172
+ - type: ndcg_at_1
2173
+ value: 61.667
2174
+ - type: ndcg_at_10
2175
+ value: 72.70100000000001
2176
+ - type: ndcg_at_100
2177
+ value: 74.928
2178
+ - type: ndcg_at_1000
2179
+ value: 75.553
2180
+ - type: ndcg_at_3
2181
+ value: 68.203
2182
+ - type: ndcg_at_5
2183
+ value: 70.804
2184
+ - type: precision_at_1
2185
+ value: 61.667
2186
+ - type: precision_at_10
2187
+ value: 9.533
2188
+ - type: precision_at_100
2189
+ value: 1.077
2190
+ - type: precision_at_1000
2191
+ value: 0.11299999999999999
2192
+ - type: precision_at_3
2193
+ value: 26.444000000000003
2194
+ - type: precision_at_5
2195
+ value: 17.599999999999998
2196
+ - type: recall_at_1
2197
+ value: 58.817
2198
+ - type: recall_at_10
2199
+ value: 84.789
2200
+ - type: recall_at_100
2201
+ value: 95.0
2202
+ - type: recall_at_1000
2203
+ value: 99.667
2204
+ - type: recall_at_3
2205
+ value: 72.8
2206
+ - type: recall_at_5
2207
+ value: 79.294
2208
+ - task:
2209
+ type: PairClassification
2210
+ dataset:
2211
+ type: mteb/sprintduplicatequestions-pairclassification
2212
+ name: MTEB SprintDuplicateQuestions
2213
+ config: default
2214
+ split: test
2215
+ revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2216
+ metrics:
2217
+ - type: cos_sim_accuracy
2218
+ value: 99.8108910891089
2219
+ - type: cos_sim_ap
2220
+ value: 95.5743678558349
2221
+ - type: cos_sim_f1
2222
+ value: 90.43133366385722
2223
+ - type: cos_sim_precision
2224
+ value: 89.67551622418878
2225
+ - type: cos_sim_recall
2226
+ value: 91.2
2227
+ - type: dot_accuracy
2228
+ value: 99.75841584158415
2229
+ - type: dot_ap
2230
+ value: 94.00786363627253
2231
+ - type: dot_f1
2232
+ value: 87.51910341314316
2233
+ - type: dot_precision
2234
+ value: 89.20041536863967
2235
+ - type: dot_recall
2236
+ value: 85.9
2237
+ - type: euclidean_accuracy
2238
+ value: 99.81485148514851
2239
+ - type: euclidean_ap
2240
+ value: 95.4752113136905
2241
+ - type: euclidean_f1
2242
+ value: 90.44334975369456
2243
+ - type: euclidean_precision
2244
+ value: 89.126213592233
2245
+ - type: euclidean_recall
2246
+ value: 91.8
2247
+ - type: manhattan_accuracy
2248
+ value: 99.81584158415842
2249
+ - type: manhattan_ap
2250
+ value: 95.5163172682464
2251
+ - type: manhattan_f1
2252
+ value: 90.51987767584097
2253
+ - type: manhattan_precision
2254
+ value: 92.3076923076923
2255
+ - type: manhattan_recall
2256
+ value: 88.8
2257
+ - type: max_accuracy
2258
+ value: 99.81584158415842
2259
+ - type: max_ap
2260
+ value: 95.5743678558349
2261
+ - type: max_f1
2262
+ value: 90.51987767584097
2263
+ - task:
2264
+ type: Clustering
2265
+ dataset:
2266
+ type: mteb/stackexchange-clustering
2267
+ name: MTEB StackExchangeClustering
2268
+ config: default
2269
+ split: test
2270
+ revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2271
+ metrics:
2272
+ - type: v_measure
2273
+ value: 62.63235986949449
2274
+ - task:
2275
+ type: Clustering
2276
+ dataset:
2277
+ type: mteb/stackexchange-clustering-p2p
2278
+ name: MTEB StackExchangeClusteringP2P
2279
+ config: default
2280
+ split: test
2281
+ revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2282
+ metrics:
2283
+ - type: v_measure
2284
+ value: 36.334795589585575
2285
+ - task:
2286
+ type: Reranking
2287
+ dataset:
2288
+ type: mteb/stackoverflowdupquestions-reranking
2289
+ name: MTEB StackOverflowDupQuestions
2290
+ config: default
2291
+ split: test
2292
+ revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2293
+ metrics:
2294
+ - type: map
2295
+ value: 52.02955214518782
2296
+ - type: mrr
2297
+ value: 52.8004838298956
2298
+ - task:
2299
+ type: Summarization
2300
+ dataset:
2301
+ type: mteb/summeval
2302
+ name: MTEB SummEval
2303
+ config: default
2304
+ split: test
2305
+ revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2306
+ metrics:
2307
+ - type: cos_sim_pearson
2308
+ value: 30.63769566275453
2309
+ - type: cos_sim_spearman
2310
+ value: 30.422379185989335
2311
+ - type: dot_pearson
2312
+ value: 26.88493071882256
2313
+ - type: dot_spearman
2314
+ value: 26.505249740971305
2315
+ - task:
2316
+ type: Retrieval
2317
+ dataset:
2318
+ type: trec-covid
2319
+ name: MTEB TRECCOVID
2320
+ config: default
2321
+ split: test
2322
+ revision: None
2323
+ metrics:
2324
+ - type: map_at_1
2325
+ value: 0.21
2326
+ - type: map_at_10
2327
+ value: 1.654
2328
+ - type: map_at_100
2329
+ value: 10.095
2330
+ - type: map_at_1000
2331
+ value: 25.808999999999997
2332
+ - type: map_at_3
2333
+ value: 0.594
2334
+ - type: map_at_5
2335
+ value: 0.9289999999999999
2336
+ - type: mrr_at_1
2337
+ value: 78.0
2338
+ - type: mrr_at_10
2339
+ value: 87.019
2340
+ - type: mrr_at_100
2341
+ value: 87.019
2342
+ - type: mrr_at_1000
2343
+ value: 87.019
2344
+ - type: mrr_at_3
2345
+ value: 86.333
2346
+ - type: mrr_at_5
2347
+ value: 86.733
2348
+ - type: ndcg_at_1
2349
+ value: 73.0
2350
+ - type: ndcg_at_10
2351
+ value: 66.52900000000001
2352
+ - type: ndcg_at_100
2353
+ value: 53.433
2354
+ - type: ndcg_at_1000
2355
+ value: 51.324000000000005
2356
+ - type: ndcg_at_3
2357
+ value: 72.02199999999999
2358
+ - type: ndcg_at_5
2359
+ value: 69.696
2360
+ - type: precision_at_1
2361
+ value: 78.0
2362
+ - type: precision_at_10
2363
+ value: 70.39999999999999
2364
+ - type: precision_at_100
2365
+ value: 55.46
2366
+ - type: precision_at_1000
2367
+ value: 22.758
2368
+ - type: precision_at_3
2369
+ value: 76.667
2370
+ - type: precision_at_5
2371
+ value: 74.0
2372
+ - type: recall_at_1
2373
+ value: 0.21
2374
+ - type: recall_at_10
2375
+ value: 1.8849999999999998
2376
+ - type: recall_at_100
2377
+ value: 13.801
2378
+ - type: recall_at_1000
2379
+ value: 49.649
2380
+ - type: recall_at_3
2381
+ value: 0.632
2382
+ - type: recall_at_5
2383
+ value: 1.009
2384
+ - task:
2385
+ type: Retrieval
2386
+ dataset:
2387
+ type: webis-touche2020
2388
+ name: MTEB Touche2020
2389
+ config: default
2390
+ split: test
2391
+ revision: None
2392
+ metrics:
2393
+ - type: map_at_1
2394
+ value: 1.797
2395
+ - type: map_at_10
2396
+ value: 9.01
2397
+ - type: map_at_100
2398
+ value: 14.682
2399
+ - type: map_at_1000
2400
+ value: 16.336000000000002
2401
+ - type: map_at_3
2402
+ value: 4.546
2403
+ - type: map_at_5
2404
+ value: 5.9270000000000005
2405
+ - type: mrr_at_1
2406
+ value: 24.490000000000002
2407
+ - type: mrr_at_10
2408
+ value: 41.156
2409
+ - type: mrr_at_100
2410
+ value: 42.392
2411
+ - type: mrr_at_1000
2412
+ value: 42.408
2413
+ - type: mrr_at_3
2414
+ value: 38.775999999999996
2415
+ - type: mrr_at_5
2416
+ value: 40.102
2417
+ - type: ndcg_at_1
2418
+ value: 21.429000000000002
2419
+ - type: ndcg_at_10
2420
+ value: 22.222
2421
+ - type: ndcg_at_100
2422
+ value: 34.405
2423
+ - type: ndcg_at_1000
2424
+ value: 46.599000000000004
2425
+ - type: ndcg_at_3
2426
+ value: 25.261
2427
+ - type: ndcg_at_5
2428
+ value: 22.695999999999998
2429
+ - type: precision_at_1
2430
+ value: 24.490000000000002
2431
+ - type: precision_at_10
2432
+ value: 19.796
2433
+ - type: precision_at_100
2434
+ value: 7.306
2435
+ - type: precision_at_1000
2436
+ value: 1.5350000000000001
2437
+ - type: precision_at_3
2438
+ value: 27.211000000000002
2439
+ - type: precision_at_5
2440
+ value: 22.857
2441
+ - type: recall_at_1
2442
+ value: 1.797
2443
+ - type: recall_at_10
2444
+ value: 15.706000000000001
2445
+ - type: recall_at_100
2446
+ value: 46.412
2447
+ - type: recall_at_1000
2448
+ value: 83.159
2449
+ - type: recall_at_3
2450
+ value: 6.1370000000000005
2451
+ - type: recall_at_5
2452
+ value: 8.599
2453
+ - task:
2454
+ type: Classification
2455
+ dataset:
2456
+ type: mteb/toxic_conversations_50k
2457
+ name: MTEB ToxicConversationsClassification
2458
+ config: default
2459
+ split: test
2460
+ revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2461
+ metrics:
2462
+ - type: accuracy
2463
+ value: 70.3302
2464
+ - type: ap
2465
+ value: 14.169121204575601
2466
+ - type: f1
2467
+ value: 54.229345975274235
2468
+ - task:
2469
+ type: Classification
2470
+ dataset:
2471
+ type: mteb/tweet_sentiment_extraction
2472
+ name: MTEB TweetSentimentExtractionClassification
2473
+ config: default
2474
+ split: test
2475
+ revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2476
+ metrics:
2477
+ - type: accuracy
2478
+ value: 58.22297679683077
2479
+ - type: f1
2480
+ value: 58.62984908377875
2481
+ - task:
2482
+ type: Clustering
2483
+ dataset:
2484
+ type: mteb/twentynewsgroups-clustering
2485
+ name: MTEB TwentyNewsgroupsClustering
2486
+ config: default
2487
+ split: test
2488
+ revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2489
+ metrics:
2490
+ - type: v_measure
2491
+ value: 49.952922428464255
2492
+ - task:
2493
+ type: PairClassification
2494
+ dataset:
2495
+ type: mteb/twittersemeval2015-pairclassification
2496
+ name: MTEB TwitterSemEval2015
2497
+ config: default
2498
+ split: test
2499
+ revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2500
+ metrics:
2501
+ - type: cos_sim_accuracy
2502
+ value: 84.68140907194373
2503
+ - type: cos_sim_ap
2504
+ value: 70.12180123666836
2505
+ - type: cos_sim_f1
2506
+ value: 65.77501791258658
2507
+ - type: cos_sim_precision
2508
+ value: 60.07853403141361
2509
+ - type: cos_sim_recall
2510
+ value: 72.66490765171504
2511
+ - type: dot_accuracy
2512
+ value: 81.92167848840674
2513
+ - type: dot_ap
2514
+ value: 60.49837581423469
2515
+ - type: dot_f1
2516
+ value: 58.44186046511628
2517
+ - type: dot_precision
2518
+ value: 52.24532224532224
2519
+ - type: dot_recall
2520
+ value: 66.3060686015831
2521
+ - type: euclidean_accuracy
2522
+ value: 84.73505394289802
2523
+ - type: euclidean_ap
2524
+ value: 70.3278904593286
2525
+ - type: euclidean_f1
2526
+ value: 65.98851124940161
2527
+ - type: euclidean_precision
2528
+ value: 60.38107752956636
2529
+ - type: euclidean_recall
2530
+ value: 72.74406332453826
2531
+ - type: manhattan_accuracy
2532
+ value: 84.73505394289802
2533
+ - type: manhattan_ap
2534
+ value: 70.00737738537337
2535
+ - type: manhattan_f1
2536
+ value: 65.80150784822642
2537
+ - type: manhattan_precision
2538
+ value: 61.892583120204606
2539
+ - type: manhattan_recall
2540
+ value: 70.23746701846966
2541
+ - type: max_accuracy
2542
+ value: 84.73505394289802
2543
+ - type: max_ap
2544
+ value: 70.3278904593286
2545
+ - type: max_f1
2546
+ value: 65.98851124940161
2547
+ - task:
2548
+ type: PairClassification
2549
+ dataset:
2550
+ type: mteb/twitterurlcorpus-pairclassification
2551
+ name: MTEB TwitterURLCorpus
2552
+ config: default
2553
+ split: test
2554
+ revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2555
+ metrics:
2556
+ - type: cos_sim_accuracy
2557
+ value: 88.44258159661582
2558
+ - type: cos_sim_ap
2559
+ value: 84.91926704880888
2560
+ - type: cos_sim_f1
2561
+ value: 77.07651086632926
2562
+ - type: cos_sim_precision
2563
+ value: 74.5894554883319
2564
+ - type: cos_sim_recall
2565
+ value: 79.73514012935017
2566
+ - type: dot_accuracy
2567
+ value: 85.88116583226608
2568
+ - type: dot_ap
2569
+ value: 78.9753854779923
2570
+ - type: dot_f1
2571
+ value: 72.17757637979255
2572
+ - type: dot_precision
2573
+ value: 66.80647486729143
2574
+ - type: dot_recall
2575
+ value: 78.48783492454572
2576
+ - type: euclidean_accuracy
2577
+ value: 88.5299025885823
2578
+ - type: euclidean_ap
2579
+ value: 85.08006075642194
2580
+ - type: euclidean_f1
2581
+ value: 77.29637336504163
2582
+ - type: euclidean_precision
2583
+ value: 74.69836253950014
2584
+ - type: euclidean_recall
2585
+ value: 80.08161379735141
2586
+ - type: manhattan_accuracy
2587
+ value: 88.55124771995187
2588
+ - type: manhattan_ap
2589
+ value: 85.00941529932851
2590
+ - type: manhattan_f1
2591
+ value: 77.33100233100232
2592
+ - type: manhattan_precision
2593
+ value: 73.37572573956317
2594
+ - type: manhattan_recall
2595
+ value: 81.73698798891284
2596
+ - type: max_accuracy
2597
+ value: 88.55124771995187
2598
+ - type: max_ap
2599
+ value: 85.08006075642194
2600
+ - type: max_f1
2601
+ value: 77.33100233100232
2602
+ language:
2603
+ - en
2604
+ license: mit
2605
+ ---
2606
+
2607
+ # gte-small
2608
+
2609
+ General Text Embeddings (GTE) model. [Towards General Text Embeddings with Multi-stage Contrastive Learning](https://arxiv.org/abs/2308.03281)
2610
+
2611
+ The GTE models are trained by Alibaba DAMO Academy. They are mainly based on the BERT framework and currently offer three different sizes of models, including [GTE-large](https://huggingface.co/thenlper/gte-large), [GTE-base](https://huggingface.co/thenlper/gte-base), and [GTE-small](https://huggingface.co/thenlper/gte-small). The GTE models are trained on a large-scale corpus of relevance text pairs, covering a wide range of domains and scenarios. This enables the GTE models to be applied to various downstream tasks of text embeddings, including **information retrieval**, **semantic textual similarity**, **text reranking**, etc.
2612
+
2613
+ ## Metrics
2614
+
2615
+ We compared the performance of the GTE models with other popular text embedding models on the MTEB benchmark. For more detailed comparison results, please refer to the [MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard).
2616
+
2617
+
2618
+
2619
+ | Model Name | Model Size (GB) | Dimension | Sequence Length | Average (56) | Clustering (11) | Pair Classification (3) | Reranking (4) | Retrieval (15) | STS (10) | Summarization (1) | Classification (12) |
2620
+ |:----:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
2621
+ | [**gte-large**](https://huggingface.co/thenlper/gte-large) | 0.67 | 1024 | 512 | **63.13** | 46.84 | 85.00 | 59.13 | 52.22 | 83.35 | 31.66 | 73.33 |
2622
+ | [**gte-base**](https://huggingface.co/thenlper/gte-base) | 0.22 | 768 | 512 | **62.39** | 46.2 | 84.57 | 58.61 | 51.14 | 82.3 | 31.17 | 73.01 |
2623
+ | [e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) | 1.34 | 1024| 512 | 62.25 | 44.49 | 86.03 | 56.61 | 50.56 | 82.05 | 30.19 | 75.24 |
2624
+ | [e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) | 0.44 | 768 | 512 | 61.5 | 43.80 | 85.73 | 55.91 | 50.29 | 81.05 | 30.28 | 73.84 |
2625
+ | [**gte-small**](https://huggingface.co/thenlper/gte-small) | 0.07 | 384 | 512 | **61.36** | 44.89 | 83.54 | 57.7 | 49.46 | 82.07 | 30.42 | 72.31 |
2626
+ | [text-embedding-ada-002](https://platform.openai.com/docs/guides/embeddings) | - | 1536 | 8192 | 60.99 | 45.9 | 84.89 | 56.32 | 49.25 | 80.97 | 30.8 | 70.93 |
2627
+ | [e5-small-v2](https://huggingface.co/intfloat/e5-base-v2) | 0.13 | 384 | 512 | 59.93 | 39.92 | 84.67 | 54.32 | 49.04 | 80.39 | 31.16 | 72.94 |
2628
+ | [sentence-t5-xxl](https://huggingface.co/sentence-transformers/sentence-t5-xxl) | 9.73 | 768 | 512 | 59.51 | 43.72 | 85.06 | 56.42 | 42.24 | 82.63 | 30.08 | 73.42 |
2629
+ | [all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) | 0.44 | 768 | 514 | 57.78 | 43.69 | 83.04 | 59.36 | 43.81 | 80.28 | 27.49 | 65.07 |
2630
+ | [sgpt-bloom-7b1-msmarco](https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco) | 28.27 | 4096 | 2048 | 57.59 | 38.93 | 81.9 | 55.65 | 48.22 | 77.74 | 33.6 | 66.19 |
2631
+ | [all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) | 0.13 | 384 | 512 | 56.53 | 41.81 | 82.41 | 58.44 | 42.69 | 79.8 | 27.9 | 63.21 |
2632
+ | [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 0.09 | 384 | 512 | 56.26 | 42.35 | 82.37 | 58.04 | 41.95 | 78.9 | 30.81 | 63.05 |
2633
+ | [contriever-base-msmarco](https://huggingface.co/nthakur/contriever-base-msmarco) | 0.44 | 768 | 512 | 56.00 | 41.1 | 82.54 | 53.14 | 41.88 | 76.51 | 30.36 | 66.68 |
2634
+ | [sentence-t5-base](https://huggingface.co/sentence-transformers/sentence-t5-base) | 0.22 | 768 | 512 | 55.27 | 40.21 | 85.18 | 53.09 | 33.63 | 81.14 | 31.39 | 69.81 |
2635
+
2636
+
2637
+ ## Usage
2638
+
2639
+ Code example
2640
+
2641
+ ```python
2642
+ import torch.nn.functional as F
2643
+ from torch import Tensor
2644
+ from transformers import AutoTokenizer, AutoModel
2645
+
2646
+ def average_pool(last_hidden_states: Tensor,
2647
+ attention_mask: Tensor) -> Tensor:
2648
+ last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
2649
+ return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
2650
+
2651
+ input_texts = [
2652
+ "what is the capital of China?",
2653
+ "how to implement quick sort in python?",
2654
+ "Beijing",
2655
+ "sorting algorithms"
2656
+ ]
2657
+
2658
+ tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
2659
+ model = AutoModel.from_pretrained("thenlper/gte-small")
2660
+
2661
+ # Tokenize the input texts
2662
+ batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')
2663
+
2664
+ outputs = model(**batch_dict)
2665
+ embeddings = average_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
2666
+
2667
+ # (Optionally) normalize embeddings
2668
+ embeddings = F.normalize(embeddings, p=2, dim=1)
2669
+ scores = (embeddings[:1] @ embeddings[1:].T) * 100
2670
+ print(scores.tolist())
2671
+ ```
2672
+
2673
+ Use with sentence-transformers:
2674
+ ```python
2675
+ from sentence_transformers import SentenceTransformer
2676
+ from sentence_transformers.util import cos_sim
2677
+
2678
+ sentences = ['That is a happy person', 'That is a very happy person']
2679
+
2680
+ model = SentenceTransformer('thenlper/gte-large')
2681
+ embeddings = model.encode(sentences)
2682
+ print(cos_sim(embeddings[0], embeddings[1]))
2683
+ ```
2684
+
2685
+ ### Limitation
2686
+
2687
+ This model exclusively caters to English texts, and any lengthy texts will be truncated to a maximum of 512 tokens.
2688
+
2689
+ ### Citation
2690
+
2691
+ If you find our paper or models helpful, please consider citing them as follows:
2692
+
2693
+ ```
2694
+ @misc{li2023general,
2695
+ title={Towards General Text Embeddings with Multi-stage Contrastive Learning},
2696
+ author={Zehan Li and Xin Zhang and Yanzhao Zhang and Dingkun Long and Pengjun Xie and Meishan Zhang},
2697
+ year={2023},
2698
+ eprint={2308.03281},
2699
+ archivePrefix={arXiv},
2700
+ primaryClass={cs.CL}
2701
+ }
2702
+ ```
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jupyter-wb536061/.cache/torch/sentence_transformers/thenlper_gte-small/",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.36.2",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.2",
4
+ "transformers": "4.36.2",
5
+ "pytorch": "2.1.2+cu121"
6
+ }
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66b400a271b4c1b0fa9a9979fff09ff9530685e64384e7e0a92fbfef8d3b59e
3
+ size 133462128
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_length": 128,
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "[PAD]",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "[SEP]",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "[UNK]"
64
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_02_thenlper_gte-small/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/README.md ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: sentence-similarity
3
+ tags:
4
+ - sentence-transformers
5
+ - feature-extraction
6
+ - sentence-similarity
7
+ language: en
8
+ license: apache-2.0
9
+ datasets:
10
+ - s2orc
11
+ - flax-sentence-embeddings/stackexchange_xml
12
+ - ms_marco
13
+ - gooaq
14
+ - yahoo_answers_topics
15
+ - code_search_net
16
+ - search_qa
17
+ - eli5
18
+ - snli
19
+ - multi_nli
20
+ - wikihow
21
+ - natural_questions
22
+ - trivia_qa
23
+ - embedding-data/sentence-compression
24
+ - embedding-data/flickr30k-captions
25
+ - embedding-data/altlex
26
+ - embedding-data/simple-wiki
27
+ - embedding-data/QQP
28
+ - embedding-data/SPECTER
29
+ - embedding-data/PAQ_pairs
30
+ - embedding-data/WikiAnswers
31
+
32
+ ---
33
+
34
+
35
+ # all-MiniLM-L6-v2
36
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
37
+
38
+ ## Usage (Sentence-Transformers)
39
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
40
+
41
+ ```
42
+ pip install -U sentence-transformers
43
+ ```
44
+
45
+ Then you can use the model like this:
46
+ ```python
47
+ from sentence_transformers import SentenceTransformer
48
+ sentences = ["This is an example sentence", "Each sentence is converted"]
49
+
50
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
51
+ embeddings = model.encode(sentences)
52
+ print(embeddings)
53
+ ```
54
+
55
+ ## Usage (HuggingFace Transformers)
56
+ Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
57
+
58
+ ```python
59
+ from transformers import AutoTokenizer, AutoModel
60
+ import torch
61
+ import torch.nn.functional as F
62
+
63
+ #Mean Pooling - Take attention mask into account for correct averaging
64
+ def mean_pooling(model_output, attention_mask):
65
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
66
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
67
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
68
+
69
+
70
+ # Sentences we want sentence embeddings for
71
+ sentences = ['This is an example sentence', 'Each sentence is converted']
72
+
73
+ # Load model from HuggingFace Hub
74
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
75
+ model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
76
+
77
+ # Tokenize sentences
78
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
79
+
80
+ # Compute token embeddings
81
+ with torch.no_grad():
82
+ model_output = model(**encoded_input)
83
+
84
+ # Perform pooling
85
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
86
+
87
+ # Normalize embeddings
88
+ sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
89
+
90
+ print("Sentence embeddings:")
91
+ print(sentence_embeddings)
92
+ ```
93
+
94
+ ## Evaluation Results
95
+
96
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=sentence-transformers/all-MiniLM-L6-v2)
97
+
98
+ ------
99
+
100
+ ## Background
101
+
102
+ The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
103
+ contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
104
+ 1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
105
+
106
+ We developped this model during the
107
+ [Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
108
+ organized by Hugging Face. We developped this model as part of the project:
109
+ [Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
110
+
111
+ ## Intended uses
112
+
113
+ Our model is intented to be used as a sentence and short paragraph encoder. Given an input text, it ouptuts a vector which captures
114
+ the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
115
+
116
+ By default, input text longer than 256 word pieces is truncated.
117
+
118
+
119
+ ## Training procedure
120
+
121
+ ### Pre-training
122
+
123
+ We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
124
+
125
+ ### Fine-tuning
126
+
127
+ We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
128
+ We then apply the cross entropy loss by comparing with true pairs.
129
+
130
+ #### Hyper parameters
131
+
132
+ We trained ou model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
133
+ We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
134
+ a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
135
+
136
+ #### Training data
137
+
138
+ We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
139
+ We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
140
+
141
+
142
+ | Dataset | Paper | Number of training tuples |
143
+ |--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
144
+ | [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
145
+ | [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
146
+ | [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
147
+ | [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
148
+ | [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
149
+ | [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
150
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs | - | 25,316,456 |
151
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs | - | 21,396,559 |
152
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs | - | 21,396,559 |
153
+ | [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
154
+ | [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
155
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
156
+ | [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
157
+ | [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
158
+ | [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
159
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
160
+ | [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
161
+ | [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
162
+ | [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
163
+ | [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
164
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
165
+ | AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
166
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
167
+ | [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
168
+ | [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
169
+ | [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
170
+ | [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
171
+ | [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
172
+ | [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
173
+ | [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
174
+ | [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
175
+ | [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
176
+ | **Total** | | **1,170,060,424** |
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jupyter-wb536061/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-L6-v2/",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.36.2",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.0.0",
4
+ "transformers": "4.6.1",
5
+ "pytorch": "1.8.1"
6
+ }
7
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a68938edba4e8d6c4e0cfd4507833d703a26b63425933405ba59468dd5264db7
3
+ size 90864192
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
snapshot/best/2_MixtureEmbeddingsModel/expert_03_sentence-transformers_all-MiniLM-L6-v2/tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_length": 128,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "[PAD]",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "[SEP]",
57
+ "stride": 0,
58
+ "strip_accents": null,
59
+ "tokenize_chinese_chars": true,
60
+ "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
+ "unk_token": "[UNK]"
64
+ }