Spaces:
Sleeping
Sleeping
Daniel van Strien
commited on
Commit
·
39cd921
1
Parent(s):
8dd872c
add autogenerated tab
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ pd.options.plotting.backend = "plotly"
|
|
| 14 |
|
| 15 |
|
| 16 |
def download_dataset():
|
| 17 |
-
return load_dataset("open-source-metrics/model-repos-stats", split="train")
|
| 18 |
|
| 19 |
|
| 20 |
def _clean_tags(tags: Optional[Union[str, List[str]]]):
|
|
@@ -30,6 +30,16 @@ def _clean_tags(tags: Optional[Union[str, List[str]]]):
|
|
| 30 |
return []
|
| 31 |
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def prep_dataset():
|
| 34 |
ds = download_dataset()
|
| 35 |
df = ds.to_pandas()
|
|
@@ -39,8 +49,10 @@ def prep_dataset():
|
|
| 39 |
df["has_languages"] = df.languages.apply(len) > 0
|
| 40 |
df["has_tags"] = df.tags.apply(len) > 0
|
| 41 |
df["has_dataset"] = df.datasets.apply(len) > 0
|
| 42 |
-
df["has_co2"] = df.co2.
|
| 43 |
df["has_co2"] = df.co2.apply(lambda x: x is not None)
|
|
|
|
|
|
|
| 44 |
df = df.drop(columns=["Unnamed: 0"])
|
| 45 |
df.to_parquet("data.parquet")
|
| 46 |
return df
|
|
@@ -148,6 +160,14 @@ def metadata_coverage_by_library(metadata_field):
|
|
| 148 |
return df.groupby("library")[metadata_field].mean().sort_values().plot.barh()
|
| 149 |
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
df = load_data()
|
| 152 |
top_n = df.library.value_counts().shape[0]
|
| 153 |
libraries = [library for library in df.library.unique() if library]
|
|
@@ -200,6 +220,12 @@ with gr.Blocks() as demo:
|
|
| 200 |
metadata_field.change(
|
| 201 |
metadata_coverage_by_library, [metadata_field], plot, queue=False
|
| 202 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
with gr.Tab("Model Cards"):
|
| 205 |
gr.Markdown(
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def download_dataset():
|
| 17 |
+
return load_dataset("open-source-metrics/model-repos-stats", split="train", ignore_verifications=True)
|
| 18 |
|
| 19 |
|
| 20 |
def _clean_tags(tags: Optional[Union[str, List[str]]]):
|
|
|
|
| 30 |
return []
|
| 31 |
|
| 32 |
|
| 33 |
+
def _is_generated_from_tag(tags):
|
| 34 |
+
return any("generated" in tag for tag in tags)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _parse_tags_for_generated(tags):
|
| 38 |
+
for tag in tags:
|
| 39 |
+
if "generated" in tag:
|
| 40 |
+
return tag
|
| 41 |
+
|
| 42 |
+
|
| 43 |
def prep_dataset():
|
| 44 |
ds = download_dataset()
|
| 45 |
df = ds.to_pandas()
|
|
|
|
| 49 |
df["has_languages"] = df.languages.apply(len) > 0
|
| 50 |
df["has_tags"] = df.tags.apply(len) > 0
|
| 51 |
df["has_dataset"] = df.datasets.apply(len) > 0
|
| 52 |
+
df["has_co2"] = df.co2.notnull()
|
| 53 |
df["has_co2"] = df.co2.apply(lambda x: x is not None)
|
| 54 |
+
df['has_license'] = df.license.notnull()
|
| 55 |
+
df['is_generated'] = df.tags.apply(_is_generated_from_tag)
|
| 56 |
df = df.drop(columns=["Unnamed: 0"])
|
| 57 |
df.to_parquet("data.parquet")
|
| 58 |
return df
|
|
|
|
| 160 |
return df.groupby("library")[metadata_field].mean().sort_values().plot.barh()
|
| 161 |
|
| 162 |
|
| 163 |
+
def metadata_coverage_by_autogenerated(metadata_field):
|
| 164 |
+
df = load_data()
|
| 165 |
+
subset_df = df[df['is_generated']].copy(deep=True)
|
| 166 |
+
subset_df.reset_index()
|
| 167 |
+
subset_df['autogenerated-from'] = subset_df.tags.apply(_parse_tags_for_generated)
|
| 168 |
+
return subset_df.groupby("autogenerated-from")[metadata_field].mean().sort_values().plot.barh()
|
| 169 |
+
|
| 170 |
+
|
| 171 |
df = load_data()
|
| 172 |
top_n = df.library.value_counts().shape[0]
|
| 173 |
libraries = [library for library in df.library.unique() if library]
|
|
|
|
| 220 |
metadata_field.change(
|
| 221 |
metadata_coverage_by_library, [metadata_field], plot, queue=False
|
| 222 |
)
|
| 223 |
+
with gr.Tab("Auto generated model cards"):
|
| 224 |
+
metadata_field = gr.Dropdown(choices=metadata_coverage_columns)
|
| 225 |
+
plot = gr.Plot()
|
| 226 |
+
metadata_field.change(
|
| 227 |
+
metadata_coverage_by_autogenerated, [metadata_field], plot, queue=False
|
| 228 |
+
)
|
| 229 |
|
| 230 |
with gr.Tab("Model Cards"):
|
| 231 |
gr.Markdown(
|