Spaces:

mteb
/

leaderboard

Running on CPU Upgrade

App Files Files Community

175

Tom Aarsen commited on Apr 8, 2024

Commit

0ebd4b8

2 Parent(s): 5bd316f 97c35aa

Merge branch 'main' into model_size_parameters

Browse files

Files changed (2) hide show

EXTERNAL_MODEL_RESULTS.json +0 -0
app.py +7 -1

EXTERNAL_MODEL_RESULTS.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -442,6 +442,7 @@ EXTERNAL_MODELS = [
     "use-cmlm-multilingual",
     "voyage-2",
     "voyage-code-2",
     "voyage-lite-01-instruct",
     "voyage-lite-02-instruct",
     "xlm-roberta-base",
@@ -565,6 +566,7 @@ EXTERNAL_MODEL_TO_LINK = {
     "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
     "voyage-2": "https://docs.voyageai.com/embeddings/",
     "voyage-code-2": "https://docs.voyageai.com/embeddings/",
     "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
     "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
     "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
@@ -687,6 +689,7 @@ EXTERNAL_MODEL_TO_DIM = {
     "use-cmlm-multilingual": 768,
     "voyage-2": 1024,
     "voyage-code-2": 1536,
     "voyage-lite-01-instruct": 1024,
     "voyage-lite-02-instruct": 1024,
     "xlm-roberta-base":  768,
@@ -809,6 +812,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
     "unsup-simcse-bert-base-uncased": 512,
     "voyage-2": 1024,
     "voyage-code-2": 16000,
     "voyage-lite-01-instruct": 4000,
     "voyage-lite-02-instruct": 4000,
     "xlm-roberta-base": 514,
@@ -901,6 +905,7 @@ EXTERNAL_MODEL_TO_SIZE = {
     "text2vec-large-chinese": 326,
     "unsup-simcse-bert-base-uncased": 110,
     "use-cmlm-multilingual": 472,
     "voyage-lite-02-instruct": 1220,
     "xlm-roberta-base": 279,
     "xlm-roberta-large": 560,
@@ -930,6 +935,7 @@ PROPRIETARY_MODELS = {
     "titan-embed-text-v1",
     "voyage-2",
     "voyage-code-2",
     "voyage-lite-01-instruct",
     "voyage-lite-02-instruct",
     "google-gecko.text-embedding-preview-0409",
@@ -2094,7 +2100,7 @@ with gr.Blocks(css=css) as block:
     language_per_task = gr.JSON(value=dict(), visible=False)
     gr.Markdown(f"""
-    Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
     """)
     with gr.Row():

     "use-cmlm-multilingual",
     "voyage-2",
     "voyage-code-2",
+    "voyage-law-2",
     "voyage-lite-01-instruct",
     "voyage-lite-02-instruct",
     "xlm-roberta-base",
     "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
     "voyage-2": "https://docs.voyageai.com/embeddings/",
     "voyage-code-2": "https://docs.voyageai.com/embeddings/",
+    "voyage-law-2": "https://docs.voyageai.com/embeddings/",
     "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
     "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
     "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
     "use-cmlm-multilingual": 768,
     "voyage-2": 1024,
     "voyage-code-2": 1536,
+    "voyage-law-2": 1024,
     "voyage-lite-01-instruct": 1024,
     "voyage-lite-02-instruct": 1024,
     "xlm-roberta-base":  768,
     "unsup-simcse-bert-base-uncased": 512,
     "voyage-2": 1024,
     "voyage-code-2": 16000,
+    "voyage-law-2": 4000,
     "voyage-lite-01-instruct": 4000,
     "voyage-lite-02-instruct": 4000,
     "xlm-roberta-base": 514,
     "text2vec-large-chinese": 326,
     "unsup-simcse-bert-base-uncased": 110,
     "use-cmlm-multilingual": 472,
+    "voyage-law-2": 1220,
     "voyage-lite-02-instruct": 1220,
     "xlm-roberta-base": 279,
     "xlm-roberta-large": 560,
     "titan-embed-text-v1",
     "voyage-2",
     "voyage-code-2",
+    "voyage-law-2",
     "voyage-lite-01-instruct",
     "voyage-lite-02-instruct",
     "google-gecko.text-embedding-preview-0409",
     language_per_task = gr.JSON(value=dict(), visible=False)
     gr.Markdown(f"""
+    Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb/blob/main/docs/adding_a_model.md" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
     """)
     with gr.Row():