File size: 2,687 Bytes
197f5ec
 
 
faf13f8
 
197f5ec
8a56d57
 
197f5ec
8a56d57
 
 
 
 
 
 
 
 
197f5ec
faf13f8
 
8a56d57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faf13f8
 
8a56d57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197f5ec
 
8a56d57
 
197f5ec
 
8a56d57
197f5ec
faf13f8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import requests
import json
import pandas as pd
from src.constants import MODEL_REPOSITORY_URL, MAIN_MODELS
import streamlit as st


def clean_models_data(df, with_filter=True):
    dict_providers = {
        "google": "Google",
        "mistralai": "MistralAI",
        "meta-llama": "Meta",
        "openai": "OpenAI",
        "anthropic": "Anthropic",
        "cohere": "Cohere",
        "microsoft": "Microsoft",
        "mistral-community": "Mistral Community",
        "databricks": "Databricks",
    }

    models_to_keep = MAIN_MODELS

    df.drop("type", axis=1, inplace=True)

    df.loc[df["name"].str.contains("/"), "name_clean"] = (
        df.loc[df["name"].str.contains("/"), "name"].str.split("/").str[1]
    )
    df["name_clean"] = df["name_clean"].fillna(df["name"])
    df["name_clean"] = df["name_clean"].replace({"-": " ", "latest": ""}, regex=True)

    df.loc[df["provider"] == "huggingface_hub", "provider_clean"] = (
        df.loc[df["provider"] == "huggingface_hub", "name"].str.split("/").str[0]
    )
    df["provider_clean"] = df["provider_clean"].fillna(df["provider"])
    df["provider_clean"] = df["provider_clean"].replace(dict_providers, regex=True)

    df["architecture_type"] = df["architecture"].apply(lambda x: x["type"])
    df["architecture_parameters"] = df["architecture"].apply(lambda x: x["parameters"])
    df["total_parameters"] = df["architecture_parameters"].apply(
        lambda x: x["total"] if isinstance(x, dict) and "total" in x.keys() else x
    )
    df["active_parameters"] = df["architecture_parameters"].apply(
        lambda x: x["active"] if isinstance(x, dict) and "active" in x.keys() else x
    )

    df["warnings"] = (
        df["warnings"].apply(lambda x: ", ".join(x) if x else None).fillna("none")
    )
    df["warning_arch"] = df["warnings"].apply(lambda x: "model-arch-not-released" in x)
    df["warning_multi_modal"] = df["warnings"].apply(
        lambda x: "model-arch-multimodal" in x
    )

    if with_filter == True:
        df = df[df["name"].isin(models_to_keep)]

    return df[
        [
            "provider",
            "provider_clean",
            "name",
            "name_clean",
            "architecture_type",
            "architecture_parameters",
            "total_parameters",
            "active_parameters",
            "warning_arch",
            "warning_multi_modal",
        ]
    ]


@st.cache_data
def load_models(filter_main=True):
    resp = requests.get(MODEL_REPOSITORY_URL)
    data = json.loads(resp.text)
    df = pd.DataFrame(data["models"])

    return clean_models_data(df, filter_main)