Spaces:

sasha
/

evaluation-buddy

Sleeping

App Files Files Community

Sasha commited on Mar 17, 2022

Commit

20aa046

1 Parent(s): d8eab79

adding little fixes

Browse files

Files changed (1) hide show

app.py +26 -16

app.py CHANGED Viewed

@@ -23,11 +23,25 @@ top_datasets= ['glue', 'super_glue', 'wikitext', 'imdb', 'squad', 'squad_es', \
                 'sick', 'xsum', 'wikiann', 'yelp_polarity', 'hellaswag', 'piqa', \
                 'race', 'winogrande']
-tasks= ['text classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
-        'machine translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
-        'reading comprehension', 'paraphrase identification', 'natural language understanding']
 metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
 with st.sidebar.expander("Datasets", expanded=True):
     dataset_name = st.selectbox(
         f"Choose a dataset to evaluate on:",
@@ -59,25 +73,21 @@ st.markdown("For more information about this dataset, check out [its website](ht
 st.markdown("### Dataset-Specific Metrics")
 if dataset_name in metrics:
     st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this: :point_down:")
-    code = ''' from datasets import load_metric
- metric = load_metric('''+dataset_name+''', '''+dataset_config+''')'''
-    st.code(code, language='python')
-    dedicated_metric = True
 else:
     st.markdown("Your dataset doesn't have a dedicated metric, but that's ok! :wink:")
     dedicated_metric = False
 st.markdown("### Task-Specific Metrics")
-task = None
-try:
-    task = dataset_builder.info.task_templates[0].task
-except:
-    for t in tasks:
-        if t in str(dataset_builder.info.description).lower():
-            task = t
-        else:
-            continue
 if task is not None:
     st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))

                 'sick', 'xsum', 'wikiann', 'yelp_polarity', 'hellaswag', 'piqa', \
                 'race', 'winogrande']
+tasks= ['classification', 'question answering', 'automatic speech recognition', 'natural language inference', \
+        'translation', 'sentiment analysis', 'text simplification', 'named entity recognition', \
+        'reading comprehension', 'paraphrase identification', 'natural language understanding',\
+        'textual entailment', 'commonsense reasoning', 'summarization']
 metrics= ['matthews_correlation', 'perplexity', 'meteor', 'code_eval', 'super_glue', 'rouge', 'mauve', 'cer', 'accuracy', 'recall', 'bleurt', 'sari', 'precision', 'mean_iou', 'squad', 'mahalanobis', 'chrf', 'mae', 'squad_v2', 'seqeval', 'cuad', 'wiki_split', 'google_bleu', 'competition_math', 'pearsonr', 'xtreme_s', 'comet', 'gleu', 'spearmanr', 'f1', 'frugalscore', 'bertscore', 'indic_glue', 'mse', 'xnli', 'ter', 'coval', 'wer', 'bleu', 'glue', 'sacrebleu']
+def find_task(dname):
+    task = None
+    dataset_builder = load_dataset_builder(dataset_name, dataset_config)
+    try:
+        task = dataset_builder.info.task_templates[0].task
+    except:
+        for t in tasks:
+            if t in str(dataset_builder.info.description).lower():
+                task = t
+            else:
+                continue
+    return(task)
 with st.sidebar.expander("Datasets", expanded=True):
     dataset_name = st.selectbox(
         f"Choose a dataset to evaluate on:",
 st.markdown("### Dataset-Specific Metrics")
 if dataset_name in metrics:
     st.markdown("Great news! Your dataset has a dedicated metric for it! You can use it like this: :point_down:")
+    if "glue" in dataset_name:
+        code = ''' from datasets import load_metric
+     metric = load_metric(\"'''+dataset_name+'''\", \"'''+dataset_config+'''\")'''
+        st.code(code, language='python')
+    else:
+        code = ''' from datasets import load_metric
+     metric = load_metric(\"'''+dataset_name+'''\")'''
+        st.code(code, language='python')
 else:
     st.markdown("Your dataset doesn't have a dedicated metric, but that's ok! :wink:")
     dedicated_metric = False
 st.markdown("### Task-Specific Metrics")
+task = find_task(dataset_name)
 if task is not None:
     st.markdown("The task associated to it your dataset is: " + task.replace('-',' '))