Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Tom Aarsen
commited on
Commit
·
bd6a61b
1
Parent(s):
5c90ee9
Compute model size based on number of parameters
Browse files- app.py +100 -126
- utils/__init__.py +0 -0
- utils/model_size.py +39 -0
app.py
CHANGED
|
@@ -4,11 +4,13 @@ import os
|
|
| 4 |
|
| 5 |
from datasets import load_dataset
|
| 6 |
import gradio as gr
|
| 7 |
-
from huggingface_hub import
|
| 8 |
from huggingface_hub.repocard import metadata_load
|
| 9 |
import pandas as pd
|
| 10 |
from tqdm.autonotebook import tqdm
|
| 11 |
|
|
|
|
|
|
|
| 12 |
TASKS = [
|
| 13 |
"BitextMining",
|
| 14 |
"Classification",
|
|
@@ -786,94 +788,94 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 786 |
}
|
| 787 |
|
| 788 |
EXTERNAL_MODEL_TO_SIZE = {
|
| 789 |
-
"allenai-specter":
|
| 790 |
-
"all-MiniLM-L12-v2":
|
| 791 |
-
"all-MiniLM-L6-v2":
|
| 792 |
-
"all-mpnet-base-v2":
|
| 793 |
-
"bert-base-10lang-cased":
|
| 794 |
-
"bert-base-15lang-cased":
|
| 795 |
-
"bert-base-25lang-cased":
|
| 796 |
-
"bert-base-multilingual-cased":
|
| 797 |
-
"bert-base-multilingual-uncased":
|
| 798 |
-
"bert-base-uncased":
|
| 799 |
-
"bert-base-swedish-cased":
|
| 800 |
-
"bge-base-zh-v1.5":
|
| 801 |
-
"bge-large-zh-v1.5":
|
| 802 |
-
"bge-large-zh-noinstruct":
|
| 803 |
-
"bge-small-zh-v1.5":
|
| 804 |
-
"camembert-base":
|
| 805 |
-
"camembert-large":
|
| 806 |
-
"cross-en-de-roberta-sentence-transformer":
|
| 807 |
-
"contriever-base-msmarco":
|
| 808 |
-
"distilbert-base-25lang-cased":
|
| 809 |
-
"distilbert-base-en-fr-cased":
|
| 810 |
-
"distilbert-base-en-fr-es-pt-it-cased":
|
| 811 |
-
"distilbert-base-fr-cased":
|
| 812 |
-
"distilbert-base-uncased":
|
| 813 |
-
"DanskBERT":
|
| 814 |
-
"distiluse-base-multilingual-cased-v2":
|
| 815 |
-
"dfm-encoder-large-v1":
|
| 816 |
-
"dfm-sentence-encoder-large-1":
|
| 817 |
-
"e5-base":
|
| 818 |
-
"e5-large":
|
| 819 |
-
"e5-mistral-7b-instruct":
|
| 820 |
-
"e5-small":
|
| 821 |
-
"electra-small-nordic":
|
| 822 |
-
"electra-small-swedish-cased-discriminator":
|
| 823 |
-
"flaubert_base_cased":
|
| 824 |
-
"flaubert_base_uncased":
|
| 825 |
-
"flaubert_large_cased":
|
| 826 |
-
"gbert-base":
|
| 827 |
-
"gbert-large":
|
| 828 |
-
"gelectra-base":
|
| 829 |
-
"gelectra-large":
|
| 830 |
-
"glove.6B.300d":
|
| 831 |
-
"gottbert-base":
|
| 832 |
-
"gtr-t5-base":
|
| 833 |
-
"gtr-t5-large":
|
| 834 |
-
"gtr-t5-xl":
|
| 835 |
-
"gtr-t5-xxl":
|
| 836 |
-
"herbert-base-retrieval-v2":
|
| 837 |
-
"komninos":
|
| 838 |
-
"luotuo-bert-medium":
|
| 839 |
-
"LASER2":
|
| 840 |
-
"LaBSE":
|
| 841 |
-
"m3e-base":
|
| 842 |
-
"m3e-large":
|
| 843 |
-
"msmarco-bert-co-condensor":
|
| 844 |
-
"multi-qa-MiniLM-L6-cos-v1":
|
| 845 |
-
"multilingual-e5-base":
|
| 846 |
-
"multilingual-e5-small":
|
| 847 |
-
"multilingual-e5-large":
|
| 848 |
-
"nb-bert-base":
|
| 849 |
-
"nb-bert-large":
|
| 850 |
-
"nomic-embed-text-v1.5-64":
|
| 851 |
-
"nomic-embed-text-v1.5-128":
|
| 852 |
-
"nomic-embed-text-v1.5-256":
|
| 853 |
-
"nomic-embed-text-v1.5-512":
|
| 854 |
-
"norbert3-base":
|
| 855 |
-
"norbert3-large":
|
| 856 |
-
"paraphrase-multilingual-mpnet-base-v2":
|
| 857 |
-
"paraphrase-multilingual-MiniLM-L12-v2":
|
| 858 |
-
"sentence-camembert-base":
|
| 859 |
-
"sentence-camembert-large":
|
| 860 |
-
"sentence-croissant-llm-base":
|
| 861 |
-
"sentence-bert-swedish-cased":
|
| 862 |
-
"sentence-t5-base":
|
| 863 |
-
"sentence-t5-large":
|
| 864 |
-
"sentence-t5-xl":
|
| 865 |
-
"sentence-t5-xxl":
|
| 866 |
-
"silver-retriever-base-v1":
|
| 867 |
-
"sup-simcse-bert-base-uncased":
|
| 868 |
-
"st-polish-paraphrase-from-distilroberta":
|
| 869 |
-
"st-polish-paraphrase-from-mpnet":
|
| 870 |
-
"text2vec-base-chinese":
|
| 871 |
-
"text2vec-large-chinese":
|
| 872 |
-
"unsup-simcse-bert-base-uncased":
|
| 873 |
-
"use-cmlm-multilingual":
|
| 874 |
-
"voyage-lite-02-instruct":
|
| 875 |
-
"xlm-roberta-base":
|
| 876 |
-
"xlm-roberta-large":
|
| 877 |
}
|
| 878 |
|
| 879 |
MODELS_TO_SKIP = {
|
|
@@ -997,6 +999,7 @@ MODELS_TO_SKIP = {
|
|
| 997 |
"beademiguelperez/sentence-transformers-multilingual-e5-small",
|
| 998 |
"arcdev/SFR-Embedding-Mistral",
|
| 999 |
"arcdev/e5-mistral-7b-instruct",
|
|
|
|
| 1000 |
}
|
| 1001 |
|
| 1002 |
def add_lang(examples):
|
|
@@ -1079,36 +1082,7 @@ def get_dim_seq_size(model):
|
|
| 1079 |
dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", "")))
|
| 1080 |
seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", ""))))
|
| 1081 |
# Get model file size without downloading
|
| 1082 |
-
|
| 1083 |
-
url = hf_hub_url(model.modelId, filename="pytorch_model.bin")
|
| 1084 |
-
meta = get_hf_file_metadata(url)
|
| 1085 |
-
size = round(meta.size / 1e9, 2)
|
| 1086 |
-
elif "pytorch_model.bin.index.json" in filenames:
|
| 1087 |
-
index_path = hf_hub_download(model.modelId, filename="pytorch_model.bin.index.json")
|
| 1088 |
-
"""
|
| 1089 |
-
{
|
| 1090 |
-
"metadata": {
|
| 1091 |
-
"total_size": 28272820224
|
| 1092 |
-
},....
|
| 1093 |
-
"""
|
| 1094 |
-
size = json.load(open(index_path))
|
| 1095 |
-
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
| 1096 |
-
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
| 1097 |
-
elif "model.safetensors" in filenames:
|
| 1098 |
-
url = hf_hub_url(model.modelId, filename="model.safetensors")
|
| 1099 |
-
meta = get_hf_file_metadata(url)
|
| 1100 |
-
size = round(meta.size / 1e9, 2)
|
| 1101 |
-
elif "model.safetensors.index.json" in filenames:
|
| 1102 |
-
index_path = hf_hub_download(model.modelId, filename="model.safetensors.index.json")
|
| 1103 |
-
"""
|
| 1104 |
-
{
|
| 1105 |
-
"metadata": {
|
| 1106 |
-
"total_size": 14483464192
|
| 1107 |
-
},....
|
| 1108 |
-
"""
|
| 1109 |
-
size = json.load(open(index_path))
|
| 1110 |
-
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
| 1111 |
-
size = round(size["metadata"]["total_size"] / 1e9, 2)
|
| 1112 |
return dim, seq, size
|
| 1113 |
|
| 1114 |
def make_datasets_clickable(df):
|
|
@@ -1120,7 +1094,7 @@ def make_datasets_clickable(df):
|
|
| 1120 |
return df
|
| 1121 |
|
| 1122 |
def add_rank(df):
|
| 1123 |
-
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (
|
| 1124 |
if len(cols_to_rank) == 1:
|
| 1125 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 1126 |
else:
|
|
@@ -1150,7 +1124,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 1150 |
# Model & at least one result
|
| 1151 |
if len(res) > 1:
|
| 1152 |
if add_emb_dim:
|
| 1153 |
-
res["Model Size (
|
| 1154 |
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
| 1155 |
res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
|
| 1156 |
df_list.append(res)
|
|
@@ -1191,7 +1165,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 1191 |
if add_emb_dim:
|
| 1192 |
try:
|
| 1193 |
# Fails on gated repos, so we only include scores for them
|
| 1194 |
-
out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (
|
| 1195 |
except:
|
| 1196 |
pass
|
| 1197 |
df_list.append(out)
|
|
@@ -1268,7 +1242,7 @@ def get_mteb_average():
|
|
| 1268 |
# Fill NaN after averaging
|
| 1269 |
DATA_OVERALL.fillna("", inplace=True)
|
| 1270 |
|
| 1271 |
-
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (
|
| 1272 |
DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1273 |
|
| 1274 |
return DATA_OVERALL
|
|
@@ -1327,7 +1301,7 @@ def get_mteb_average_zh():
|
|
| 1327 |
# Fill NaN after averaging
|
| 1328 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
| 1329 |
|
| 1330 |
-
DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (
|
| 1331 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
|
| 1332 |
|
| 1333 |
return DATA_OVERALL_ZH
|
|
@@ -1389,7 +1363,7 @@ def get_mteb_average_fr():
|
|
| 1389 |
# Fill NaN after averaging
|
| 1390 |
DATA_OVERALL_FR.fillna("", inplace=True)
|
| 1391 |
|
| 1392 |
-
DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (
|
| 1393 |
DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)]
|
| 1394 |
|
| 1395 |
return DATA_OVERALL_FR
|
|
@@ -1443,7 +1417,7 @@ def get_mteb_average_pl():
|
|
| 1443 |
# Fill NaN after averaging
|
| 1444 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
| 1445 |
|
| 1446 |
-
DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (
|
| 1447 |
DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1448 |
|
| 1449 |
return DATA_OVERALL_PL
|
|
|
|
| 4 |
|
| 5 |
from datasets import load_dataset
|
| 6 |
import gradio as gr
|
| 7 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 8 |
from huggingface_hub.repocard import metadata_load
|
| 9 |
import pandas as pd
|
| 10 |
from tqdm.autonotebook import tqdm
|
| 11 |
|
| 12 |
+
from utils.model_size import get_model_size
|
| 13 |
+
|
| 14 |
TASKS = [
|
| 15 |
"BitextMining",
|
| 16 |
"Classification",
|
|
|
|
| 788 |
}
|
| 789 |
|
| 790 |
EXTERNAL_MODEL_TO_SIZE = {
|
| 791 |
+
"allenai-specter": 110,
|
| 792 |
+
"all-MiniLM-L12-v2": 33,
|
| 793 |
+
"all-MiniLM-L6-v2": 23,
|
| 794 |
+
"all-mpnet-base-v2": 110,
|
| 795 |
+
"bert-base-10lang-cased": 138,
|
| 796 |
+
"bert-base-15lang-cased": 138,
|
| 797 |
+
"bert-base-25lang-cased": 138,
|
| 798 |
+
"bert-base-multilingual-cased": 179,
|
| 799 |
+
"bert-base-multilingual-uncased": 168,
|
| 800 |
+
"bert-base-uncased": 110,
|
| 801 |
+
"bert-base-swedish-cased": 125,
|
| 802 |
+
"bge-base-zh-v1.5": 102,
|
| 803 |
+
"bge-large-zh-v1.5": 326,
|
| 804 |
+
"bge-large-zh-noinstruct": 326,
|
| 805 |
+
"bge-small-zh-v1.5": 24,
|
| 806 |
+
"camembert-base": 111,
|
| 807 |
+
"camembert-large": 338,
|
| 808 |
+
"cross-en-de-roberta-sentence-transformer": 278,
|
| 809 |
+
"contriever-base-msmarco": 110,
|
| 810 |
+
"distilbert-base-25lang-cased": 110,
|
| 811 |
+
"distilbert-base-en-fr-cased": 110,
|
| 812 |
+
"distilbert-base-en-fr-es-pt-it-cased": 110,
|
| 813 |
+
"distilbert-base-fr-cased": 110,
|
| 814 |
+
"distilbert-base-uncased": 110,
|
| 815 |
+
"DanskBERT": 125,
|
| 816 |
+
"distiluse-base-multilingual-cased-v2": 135,
|
| 817 |
+
"dfm-encoder-large-v1": 355,
|
| 818 |
+
"dfm-sentence-encoder-large-1": 355,
|
| 819 |
+
"e5-base": 110,
|
| 820 |
+
"e5-large": 335,
|
| 821 |
+
"e5-mistral-7b-instruct": 7110,
|
| 822 |
+
"e5-small": 33,
|
| 823 |
+
"electra-small-nordic": 23,
|
| 824 |
+
"electra-small-swedish-cased-discriminator": 16,
|
| 825 |
+
"flaubert_base_cased": 138,
|
| 826 |
+
"flaubert_base_uncased": 138,
|
| 827 |
+
"flaubert_large_cased": 372,
|
| 828 |
+
"gbert-base": 110,
|
| 829 |
+
"gbert-large": 337,
|
| 830 |
+
"gelectra-base": 110,
|
| 831 |
+
"gelectra-large": 335,
|
| 832 |
+
"glove.6B.300d": 120,
|
| 833 |
+
"gottbert-base": 127,
|
| 834 |
+
"gtr-t5-base": 110,
|
| 835 |
+
"gtr-t5-large": 168,
|
| 836 |
+
"gtr-t5-xl": 1240,
|
| 837 |
+
"gtr-t5-xxl": 4865,
|
| 838 |
+
"herbert-base-retrieval-v2": 125,
|
| 839 |
+
"komninos": 134,
|
| 840 |
+
"luotuo-bert-medium": 328,
|
| 841 |
+
"LASER2": 43,
|
| 842 |
+
"LaBSE": 471,
|
| 843 |
+
"m3e-base": 102,
|
| 844 |
+
"m3e-large": 102,
|
| 845 |
+
"msmarco-bert-co-condensor": 110,
|
| 846 |
+
"multi-qa-MiniLM-L6-cos-v1": 23,
|
| 847 |
+
"multilingual-e5-base": 278,
|
| 848 |
+
"multilingual-e5-small": 118,
|
| 849 |
+
"multilingual-e5-large": 560,
|
| 850 |
+
"nb-bert-base": 179,
|
| 851 |
+
"nb-bert-large": 355,
|
| 852 |
+
"nomic-embed-text-v1.5-64": 138,
|
| 853 |
+
"nomic-embed-text-v1.5-128": 138,
|
| 854 |
+
"nomic-embed-text-v1.5-256": 138,
|
| 855 |
+
"nomic-embed-text-v1.5-512": 138,
|
| 856 |
+
"norbert3-base": 131,
|
| 857 |
+
"norbert3-large": 368,
|
| 858 |
+
"paraphrase-multilingual-mpnet-base-v2": 278,
|
| 859 |
+
"paraphrase-multilingual-MiniLM-L12-v2": 118,
|
| 860 |
+
"sentence-camembert-base": 110,
|
| 861 |
+
"sentence-camembert-large": 337,
|
| 862 |
+
"sentence-croissant-llm-base": 1280,
|
| 863 |
+
"sentence-bert-swedish-cased": 125,
|
| 864 |
+
"sentence-t5-base": 110,
|
| 865 |
+
"sentence-t5-large": 168,
|
| 866 |
+
"sentence-t5-xl": 1240,
|
| 867 |
+
"sentence-t5-xxl": 4865,
|
| 868 |
+
"silver-retriever-base-v1": 125,
|
| 869 |
+
"sup-simcse-bert-base-uncased": 110,
|
| 870 |
+
"st-polish-paraphrase-from-distilroberta": 125,
|
| 871 |
+
"st-polish-paraphrase-from-mpnet": 125,
|
| 872 |
+
"text2vec-base-chinese": 102,
|
| 873 |
+
"text2vec-large-chinese": 326,
|
| 874 |
+
"unsup-simcse-bert-base-uncased": 110,
|
| 875 |
+
"use-cmlm-multilingual": 472,
|
| 876 |
+
"voyage-lite-02-instruct": 613,
|
| 877 |
+
"xlm-roberta-base": 279,
|
| 878 |
+
"xlm-roberta-large": 560,
|
| 879 |
}
|
| 880 |
|
| 881 |
MODELS_TO_SKIP = {
|
|
|
|
| 999 |
"beademiguelperez/sentence-transformers-multilingual-e5-small",
|
| 1000 |
"arcdev/SFR-Embedding-Mistral",
|
| 1001 |
"arcdev/e5-mistral-7b-instruct",
|
| 1002 |
+
"Koat/gte-tiny",
|
| 1003 |
}
|
| 1004 |
|
| 1005 |
def add_lang(examples):
|
|
|
|
| 1082 |
dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", "")))
|
| 1083 |
seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", ""))))
|
| 1084 |
# Get model file size without downloading
|
| 1085 |
+
size = get_model_size(model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1086 |
return dim, seq, size
|
| 1087 |
|
| 1088 |
def make_datasets_clickable(df):
|
|
|
|
| 1094 |
return df
|
| 1095 |
|
| 1096 |
def add_rank(df):
|
| 1097 |
+
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens"]]
|
| 1098 |
if len(cols_to_rank) == 1:
|
| 1099 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 1100 |
else:
|
|
|
|
| 1124 |
# Model & at least one result
|
| 1125 |
if len(res) > 1:
|
| 1126 |
if add_emb_dim:
|
| 1127 |
+
res["Model Size (Million Parameters)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "")
|
| 1128 |
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
| 1129 |
res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
|
| 1130 |
df_list.append(res)
|
|
|
|
| 1165 |
if add_emb_dim:
|
| 1166 |
try:
|
| 1167 |
# Fails on gated repos, so we only include scores for them
|
| 1168 |
+
out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (Million Parameters)"] = get_dim_seq_size(model)
|
| 1169 |
except:
|
| 1170 |
pass
|
| 1171 |
df_list.append(out)
|
|
|
|
| 1242 |
# Fill NaN after averaging
|
| 1243 |
DATA_OVERALL.fillna("", inplace=True)
|
| 1244 |
|
| 1245 |
+
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
|
| 1246 |
DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1247 |
|
| 1248 |
return DATA_OVERALL
|
|
|
|
| 1301 |
# Fill NaN after averaging
|
| 1302 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
| 1303 |
|
| 1304 |
+
DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]]
|
| 1305 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
|
| 1306 |
|
| 1307 |
return DATA_OVERALL_ZH
|
|
|
|
| 1363 |
# Fill NaN after averaging
|
| 1364 |
DATA_OVERALL_FR.fillna("", inplace=True)
|
| 1365 |
|
| 1366 |
+
DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_FR)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)"]]
|
| 1367 |
DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)]
|
| 1368 |
|
| 1369 |
return DATA_OVERALL_FR
|
|
|
|
| 1417 |
# Fill NaN after averaging
|
| 1418 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
| 1419 |
|
| 1420 |
+
DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (Million Parameters)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
|
| 1421 |
DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1422 |
|
| 1423 |
return DATA_OVERALL_PL
|
utils/__init__.py
ADDED
|
File without changes
|
utils/model_size.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata, model_info as get_model_info, get_hf_file_metadata, hf_hub_url
|
| 4 |
+
from huggingface_hub import hf_hub_download
|
| 5 |
+
|
| 6 |
+
# Map model IDs to the number of bytes used for one parameter. So, 4 bytes for fp32, 2 bytes for fp16, etc.
|
| 7 |
+
# By default, we assume that the model is stored in fp32.
|
| 8 |
+
KNOWN_BYTES_PER_PARAM = {}
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def get_model_size(model_info: ModelInfo):
|
| 12 |
+
'''Get the size of the model in million of parameters.'''
|
| 13 |
+
try:
|
| 14 |
+
safetensors = get_safetensors_metadata(model_info.id)
|
| 15 |
+
return round(sum(safetensors.parameter_count.values()) / 1e6)
|
| 16 |
+
except Exception as e:
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
filenames = [sib.rfilename for sib in model_info.siblings]
|
| 20 |
+
if "pytorch_model.bin" in filenames:
|
| 21 |
+
url = hf_hub_url(model_info.id, filename="pytorch_model.bin")
|
| 22 |
+
meta = get_hf_file_metadata(url)
|
| 23 |
+
bytes_per_param = KNOWN_BYTES_PER_PARAM.get(model_info.id, 4)
|
| 24 |
+
return round(meta.size / bytes_per_param / 1e6)
|
| 25 |
+
|
| 26 |
+
if "pytorch_model.bin.index.json" in filenames:
|
| 27 |
+
index_path = hf_hub_download(model_info.id, filename="pytorch_model.bin.index.json")
|
| 28 |
+
"""
|
| 29 |
+
{
|
| 30 |
+
"metadata": {
|
| 31 |
+
"total_size": 28272820224
|
| 32 |
+
},....
|
| 33 |
+
"""
|
| 34 |
+
size = json.load(open(index_path))
|
| 35 |
+
bytes_per_param = KNOWN_BYTES_PER_PARAM.get(model_info.id, 4)
|
| 36 |
+
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
| 37 |
+
return round(size["metadata"]["total_size"] / bytes_per_param / 1e6)
|
| 38 |
+
|
| 39 |
+
return None
|