Spaces:

sasha
/

MetricCompare

Runtime error

App Files Files Community

sasha HF Staff commited on Jun 15, 2022

Commit

d276596

1 Parent(s): 646dc33

Update app.py

Browse files

a bunch of changes that will probably crash

Files changed (1) hide show

app.py +79 -28

app.py CHANGED Viewed

@@ -10,48 +10,99 @@ import matplotlib.pyplot as plt
 st.title("Metric Compare")
-st.markdown("## Choose the dataset you want to use for the comparison:")
 api = HfApi()
 datasets = [d.id for d in api.list_datasets(filter="task_categories:text-classification", sort = "downloads", direction=-1, limit = 20)]
 dset = st.selectbox('Choose a dataset from the Hub', options=datasets)
-st.markdown("## Now select up to 5 models to compare their performance:")
-filt = ModelFilter(trained_dataset=dset)
-models = [m.modelId for m in api.list_models(filter=filt, sort = "downloads", direction=-1, limit = 20)]
-model = st.multiselect(
-     'Choose the models that have been trained/finetuned on this dataset',
-     options=models)
-"""
-tokenizer1 = AutoTokenizer.from_pretrained("lvwerra/distilbert-imdb")
-model1 = AutoModelForSequenceClassification.from_pretrained("lvwerra/distilbert-imdb")
-tokenizer2 = AutoTokenizer.from_pretrained("sahn/distilbert-base-uncased-finetuned-imdb")
-model2 = AutoModelForSequenceClassification.from_pretrained("sahn/distilbert-base-uncased-finetuned-imdb")
-tokenizer3 = AutoTokenizer.from_pretrained("aychang/roberta-base-imdb")
-model3 = AutoModelForSequenceClassification.from_pretrained("aychang/roberta-base-imdb")
-tokenizer4 = AutoTokenizer.from_pretrained("Sreevishnu/funnel-transformer-small-imdb")
-model4 = AutoModelForSequenceClassification.from_pretrained("Sreevishnu/funnel-transformer-small-imdb")
-tokenizer5 = AutoTokenizer.from_pretrained("RANG012/SENATOR")
-model5 = AutoModelForSequenceClassification.from_pretrained("RANG012/SENATOR")
-accuracy = evaluate.load("accuracy")
-f1 = evaluate.load('f1')
-data = datasets.load_dataset("imdb", split="test").shuffle().select(range(1000))
-eval = evaluator("text-classification")
 pipe1 = pipeline("text-classification", model=model1, tokenizer= tokenizer1, device=0)
 res_accuracy1 = eval.compute(model_or_pipeline=pipe1, data=data, metric=accuracy,

 st.title("Metric Compare")
+st.markdown("### Choose the dataset you want to use for the comparison:")
 api = HfApi()
 datasets = [d.id for d in api.list_datasets(filter="task_categories:text-classification", sort = "downloads", direction=-1, limit = 20)]
 dset = st.selectbox('Choose a dataset from the Hub', options=datasets)
+dset_split = st.selectbox('Choose a dataset split for evaluation', options=dset.keys())
+st.markdown("### Now select up to 5 models to compare their performance:")
+filt = ModelFilter(trained_dataset=dset)
+all_models = [m.modelId for m in api.list_models(filter=filt, sort = "downloads", direction=-1, limit = 20) if 't5' not in model.tags]
+models = st.multiselect(
+     'Choose the models that have been trained/finetuned on this dataset',
+     options=all_models)
+button = st.button("Print Models",disabled=False)
+if button :
+   if len(location) < 6:
+       st.write(models)
+   else:
+       st.warning("Please select at most 5 models")
+st.markdown("### What two metrics do you want to compare?")
+metrics = st.multiselect(
+     'Choose the metrics for the comparison',
+     options=['f1', 'accuracy', 'precision', 'recall'])
+button2 = st.button("Print Metrics",disabled=False)
+if button2 :
+   if len(metrics ) < 3:
+       st.write(metrics)
+   else:
+       st.warning("Please select at most 2 metrics")
+st.markdown("### Now wait for the dataset and models to load (this can take some time if they are big!")
+### Loading data
+try:
+  data = datasets.load_dataset(dset, split=dset_split)
+  st.text("Loaded the validation split of dataset "+ str(dset))
+except:
+  data = datasets.load_dataset(dset, split="test")
+  st.text("Loaded the test split of dataset "+ str(dset))
+  st.text("Sorry, I can't load this dataset... try another one!")
+### Loading models
+for i in range (len(models)):
+  try:
+    globals()[f"tokenizer_{i}"] = AutoTokenizer.from_pretrained(models[i])
+    globals()[f"model_{i}"] = AutoModelForSequenceClassification.from_pretrained(models[i])
+     st.text("Loaded model "+ str(models[i]))
+  except:
+     st.text("Sorry, I can't load model "+ str(models[i]))
+### Defining metrics
+for i in range (len(metrics)):
+  try:
+    globals()[f"metrics[i]"] = evaluate.load(metrics[i])
+  except:
+     st.text("Sorry, I can't load metric "+ str(metrics[i]) +"... Try another one!")
+### Defining Evaluator
+eval = evaluator("text-classification")
+### Defining pipelines
+st.markdown("### Help us pick the right labels for your models")
+st.text("The labels for your dataset are: "+ str(data.features['label'].names))
+"""
+for i in range (len(model_list)):
+  st.text("The labels for your dataset are: "+ str(data.features['label'].names))
+  print(model_list[i])
+  print(AutoConfig.from_pretrained(models[0]).id2label)
+for i in range (len(models)):
+  try:
+    globals()[f"pipe1_{i}"] = AutoTokenizer.from_pretrained(models[i])
+    globals()[f"model_{i}"] = AutoModelForSequenceClassification.from_pretrained(models[i])
+     st.text("Loaded model "+ str(models[i]))
+  except:
+     st.text("Sorry, I can't load model "+ str(models[i]))
 pipe1 = pipeline("text-classification", model=model1, tokenizer= tokenizer1, device=0)
 res_accuracy1 = eval.compute(model_or_pipeline=pipe1, data=data, metric=accuracy,