Spaces:

patrickvonplaten
/

hf_stats

Runtime error

App Files Files Community

patrickvonplaten commited on Feb 2, 2022

Commit

c562611

1 Parent(s): f04fb0d

up

Browse files

Files changed (2) hide show

app.py +101 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+#!/usr/bin/env python3
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.repocard import metadata_load
+from collections import Counter, ChainMap
+import multiprocessing
+import pandas as pd
+import streamlit as st
+ALL_LANGUAGES = []
+ALL_LICENSES = []
+def get_model_ids_and_tags():
+    api = HfApi()
+    models = api.list_models(full=True)
+    model_ids = [x.modelId for x in models]
+    tags = [x.tags for x in models]
+    return model_ids, tags
+MODEL_IDS, TAGS = get_model_ids_and_tags()
+def get_metadatas(i):
+    metadatas = {}
+    model_id = MODEL_IDS[i]
+    try:
+        readme_path = hf_hub_download(model_id, filename="README.md")
+        metadatas[model_id] = metadata_load(readme_path)
+    except:
+        print(model_id + " has no README.md")
+    return metadatas
+def retrieve_data(metadatas):
+    for metadata in metadatas.values():
+        if metadata is None:
+            continue
+        if "language" in metadata:
+            ALL_LANGUAGES.append(metadata["language"])
+        if "license" in metadata:
+            ALL_LICENSES.append(metadata["license"])
+@st.cache(persist=True)
+def main():
+    # 0. Get model ids
+    model_ids, tags = get_model_ids_and_tags()
+    # 1. Retrieve metadatas
+    pool = multiprocessing.Pool()
+    metadatas = dict(ChainMap(*pool.map(get_metadatas, range(len(MODEL_IDS)))))
+    pool.close()
+    # 2. Parse to results
+    retrieve_data(metadatas)
+    def clean_lists(list_like):
+        clean_list = []
+        for item in list_like:
+            if isinstance(item, str):
+                clean_list.append(item)
+            elif isinstance(item, list) and all(isinstance(x, str) for x in item):
+                clean_list = clean_list + item
+        return clean_list
+    # 3. count data
+    lang_counter = Counter(clean_lists(ALL_LANGUAGES))
+    license_counter = Counter(clean_lists(ALL_LICENSES))
+    # 4. count tags
+    tags_counter = Counter(sum(TAGS, []))
+    # 5. change to frame
+    lang_data_frame = pd.DataFrame.from_dict(lang_counter, orient="index")
+    lang_data_frame = lang_data_frame.sort_index().transpose()
+    license_data_frame = pd.DataFrame.from_dict(license_counter, orient="index")
+    license_data_frame = license_data_frame.sort_index().transpose()
+    tags_data_frame = pd.DataFrame.from_dict(tags_counter, orient="index")
+    tags_data_frame = tags_data_frame.sort_index().transpose()
+    return lang_data_frame, license_data_frame, tags_data_frame
+lang_data_frame, license_data_frame, tags_data_frame = main()
+st.title("All Languages")
+st.dataframe(lang_data_frame, width=600, height=1200)
+st.write("Total num of langauges", lang_data_frame.shape[-1])
+st.title("All Licenses")
+st.dataframe(license_data_frame, width=600, height=1200)
+st.write("Total num of licenses", license_data_frame.shape[-1])
+st.title("All Tags")
+st.dataframe(tags_data_frame, width=600, height=1200)
+st.write("Total num of different tags", tags_data_frame.shape[-1])

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ huggingface_hub
2	+ pandas