Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Merge branch 'main' into model_size_parameters
Browse files- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +7 -1
EXTERNAL_MODEL_RESULTS.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app.py
CHANGED
|
@@ -442,6 +442,7 @@ EXTERNAL_MODELS = [
|
|
| 442 |
"use-cmlm-multilingual",
|
| 443 |
"voyage-2",
|
| 444 |
"voyage-code-2",
|
|
|
|
| 445 |
"voyage-lite-01-instruct",
|
| 446 |
"voyage-lite-02-instruct",
|
| 447 |
"xlm-roberta-base",
|
|
@@ -565,6 +566,7 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 565 |
"use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
|
| 566 |
"voyage-2": "https://docs.voyageai.com/embeddings/",
|
| 567 |
"voyage-code-2": "https://docs.voyageai.com/embeddings/",
|
|
|
|
| 568 |
"voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
|
| 569 |
"voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
|
| 570 |
"xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
|
|
@@ -687,6 +689,7 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 687 |
"use-cmlm-multilingual": 768,
|
| 688 |
"voyage-2": 1024,
|
| 689 |
"voyage-code-2": 1536,
|
|
|
|
| 690 |
"voyage-lite-01-instruct": 1024,
|
| 691 |
"voyage-lite-02-instruct": 1024,
|
| 692 |
"xlm-roberta-base": 768,
|
|
@@ -809,6 +812,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 809 |
"unsup-simcse-bert-base-uncased": 512,
|
| 810 |
"voyage-2": 1024,
|
| 811 |
"voyage-code-2": 16000,
|
|
|
|
| 812 |
"voyage-lite-01-instruct": 4000,
|
| 813 |
"voyage-lite-02-instruct": 4000,
|
| 814 |
"xlm-roberta-base": 514,
|
|
@@ -901,6 +905,7 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
| 901 |
"text2vec-large-chinese": 326,
|
| 902 |
"unsup-simcse-bert-base-uncased": 110,
|
| 903 |
"use-cmlm-multilingual": 472,
|
|
|
|
| 904 |
"voyage-lite-02-instruct": 1220,
|
| 905 |
"xlm-roberta-base": 279,
|
| 906 |
"xlm-roberta-large": 560,
|
|
@@ -930,6 +935,7 @@ PROPRIETARY_MODELS = {
|
|
| 930 |
"titan-embed-text-v1",
|
| 931 |
"voyage-2",
|
| 932 |
"voyage-code-2",
|
|
|
|
| 933 |
"voyage-lite-01-instruct",
|
| 934 |
"voyage-lite-02-instruct",
|
| 935 |
"google-gecko.text-embedding-preview-0409",
|
|
@@ -2094,7 +2100,7 @@ with gr.Blocks(css=css) as block:
|
|
| 2094 |
language_per_task = gr.JSON(value=dict(), visible=False)
|
| 2095 |
|
| 2096 |
gr.Markdown(f"""
|
| 2097 |
-
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb
|
| 2098 |
""")
|
| 2099 |
|
| 2100 |
with gr.Row():
|
|
|
|
| 442 |
"use-cmlm-multilingual",
|
| 443 |
"voyage-2",
|
| 444 |
"voyage-code-2",
|
| 445 |
+
"voyage-law-2",
|
| 446 |
"voyage-lite-01-instruct",
|
| 447 |
"voyage-lite-02-instruct",
|
| 448 |
"xlm-roberta-base",
|
|
|
|
| 566 |
"use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
|
| 567 |
"voyage-2": "https://docs.voyageai.com/embeddings/",
|
| 568 |
"voyage-code-2": "https://docs.voyageai.com/embeddings/",
|
| 569 |
+
"voyage-law-2": "https://docs.voyageai.com/embeddings/",
|
| 570 |
"voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
|
| 571 |
"voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
|
| 572 |
"xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
|
|
|
|
| 689 |
"use-cmlm-multilingual": 768,
|
| 690 |
"voyage-2": 1024,
|
| 691 |
"voyage-code-2": 1536,
|
| 692 |
+
"voyage-law-2": 1024,
|
| 693 |
"voyage-lite-01-instruct": 1024,
|
| 694 |
"voyage-lite-02-instruct": 1024,
|
| 695 |
"xlm-roberta-base": 768,
|
|
|
|
| 812 |
"unsup-simcse-bert-base-uncased": 512,
|
| 813 |
"voyage-2": 1024,
|
| 814 |
"voyage-code-2": 16000,
|
| 815 |
+
"voyage-law-2": 4000,
|
| 816 |
"voyage-lite-01-instruct": 4000,
|
| 817 |
"voyage-lite-02-instruct": 4000,
|
| 818 |
"xlm-roberta-base": 514,
|
|
|
|
| 905 |
"text2vec-large-chinese": 326,
|
| 906 |
"unsup-simcse-bert-base-uncased": 110,
|
| 907 |
"use-cmlm-multilingual": 472,
|
| 908 |
+
"voyage-law-2": 1220,
|
| 909 |
"voyage-lite-02-instruct": 1220,
|
| 910 |
"xlm-roberta-base": 279,
|
| 911 |
"xlm-roberta-large": 560,
|
|
|
|
| 935 |
"titan-embed-text-v1",
|
| 936 |
"voyage-2",
|
| 937 |
"voyage-code-2",
|
| 938 |
+
"voyage-law-2",
|
| 939 |
"voyage-lite-01-instruct",
|
| 940 |
"voyage-lite-02-instruct",
|
| 941 |
"google-gecko.text-embedding-preview-0409",
|
|
|
|
| 2100 |
language_per_task = gr.JSON(value=dict(), visible=False)
|
| 2101 |
|
| 2102 |
gr.Markdown(f"""
|
| 2103 |
+
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb/blob/main/docs/adding_a_model.md" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
| 2104 |
""")
|
| 2105 |
|
| 2106 |
with gr.Row():
|