Spaces:
Runtime error
Runtime error
Commit
·
fef8635
1
Parent(s):
83e6b38
app update
Browse files
app.py
CHANGED
|
@@ -3,47 +3,73 @@ import streamlit as st
|
|
| 3 |
import math
|
| 4 |
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
)
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
def setup_inputs():
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
def
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
def show_paged_selected_model_info(models_df, page_col):
|
| 29 |
-
page = page_col.number_input("page", 0, math.ceil(len(selected_models_df) / 10))
|
| 30 |
-
selected_models_df_subset = models_df.iloc[
|
| 31 |
-
page * n_per_page : (page+ 1) * n_per_page
|
| 32 |
-
]
|
| 33 |
-
st.write(f"found {len(selected_models_df)} models")
|
| 34 |
-
for (model_name, tag, readme) in selected_models_df_subset[
|
| 35 |
-
["modelId", "pipeline_tag", "readme"]
|
| 36 |
-
].itertuples(index=False):
|
| 37 |
-
with st.expander(f"{model_name} ({tag})"):
|
| 38 |
-
st.write(readme)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
models_df = pd.read_csv("models_with_readmes.csv").dropna(subset=["readme"])
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
selected_models_df = get_selected_models_df(query, readme_query)
|
| 46 |
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
|
|
|
| 3 |
import math
|
| 4 |
|
| 5 |
|
| 6 |
+
class ModelFinder:
|
| 7 |
+
def __init__(self, models_df):
|
| 8 |
+
self.setup_inputs()
|
| 9 |
+
self.models_df = models_df
|
| 10 |
+
self.n_per_page = 10
|
|
|
|
| 11 |
|
| 12 |
+
def setup_page(self):
|
| 13 |
+
st.title("Huggingface model explorer")
|
| 14 |
+
st.text(f"search {len(models_df)} models by name or readme")
|
| 15 |
+
st.text(
|
| 16 |
+
"note that there are many more models but here we only show those with readme"
|
| 17 |
+
)
|
| 18 |
|
| 19 |
+
def setup_inputs(self):
|
| 20 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 21 |
+
self.query_input = col1.text_input("model name query", value="")
|
| 22 |
+
self.author_query_input = col2.text_input("author query", value="")
|
| 23 |
+
self.id_query_input = col3.text_input("modelId query", value="")
|
| 24 |
+
self.readme_query_input = col4.text_input("readme query", value="")
|
| 25 |
+
self.page = col5
|
| 26 |
|
| 27 |
+
def get_selected_models_df(self, query, readme_query, id_query, author_query):
|
| 28 |
+
return self.models_df[
|
| 29 |
+
self.models_df["readme"].str.lower().str.contains(readme_query)
|
| 30 |
+
& self.models_df["modelId"].str.lower().str.contains(id_query)
|
| 31 |
+
& self.models_df["author"].str.lower().str.contains(author_query)
|
| 32 |
+
& self.models_df["model_name"].str.lower().str.contains(query)
|
| 33 |
+
]
|
| 34 |
|
| 35 |
+
def show_paged_selected_model_info(self, selected_models_df):
|
| 36 |
+
page = self.page.number_input("page", 0, math.ceil(len(selected_models_df) / 10))
|
| 37 |
+
selected_models_df_subset = selected_models_df.iloc[
|
| 38 |
+
page * self.n_per_page : (page + 1) * self.n_per_page
|
| 39 |
+
]
|
| 40 |
+
st.write(f"found {len(selected_models_df)} models")
|
| 41 |
+
for (model_name, tag, readme) in selected_models_df_subset[
|
| 42 |
+
["modelId", "pipeline_tag", "readme"]
|
| 43 |
+
].itertuples(index=False):
|
| 44 |
+
with st.expander(f"{model_name} ({tag})"):
|
| 45 |
+
st.write(readme)
|
| 46 |
|
| 47 |
+
def run(self):
|
| 48 |
+
self.setup_page()
|
| 49 |
+
selected_models_df = self.get_selected_models_df(
|
| 50 |
+
self.query_input,
|
| 51 |
+
self.readme_query_input,
|
| 52 |
+
self.id_query_input,
|
| 53 |
+
self.author_query_input,
|
| 54 |
+
)
|
| 55 |
+
self.show_paged_selected_model_info(selected_models_df)
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
def prepare_models_df(path):
|
| 59 |
+
df = pd.read_parquet(path).dropna(subset=["readme"])
|
| 60 |
+
sep_tuples = [
|
| 61 |
+
tp if len(tp) == 2 else ("", tp[0])
|
| 62 |
+
for tp in df["modelId"].str.split("/").to_list()
|
| 63 |
+
]
|
| 64 |
+
authors, model_names = zip(*sep_tuples)
|
| 65 |
+
df["author"] = authors
|
| 66 |
+
df["model_name"] = model_names
|
| 67 |
+
return df
|
| 68 |
|
|
|
|
| 69 |
|
| 70 |
+
model_path = "models_with_readmes.parquet"
|
| 71 |
+
models_df = prepare_models_df(model_path)
|
|
|
|
| 72 |
|
| 73 |
+
app = ModelFinder(models_df)
|
| 74 |
|
| 75 |
+
app.run()
|