Spaces:

nazneen
/

error-analysis

Runtime error

App Files Files Community

nazneen commited on May 21, 2022

Commit

0154388

1 Parent(s): 23efaf2

adding parquets

Browse files

Files changed (1) hide show

app.py +17 -18

app.py CHANGED Viewed

@@ -224,15 +224,6 @@ if __name__ == "__main__":
         ["distilbert-base-uncased-finetuned-sst-2-english",
             "albert-base-v2-yelp-polarity"],
     )
-    loss_quantile = st.sidebar.slider(
-        "Loss Quantile", min_value=0.5, max_value=1.0,step=0.01,value=0.95
-    )
-    run_kmeans = st.sidebar.radio("Cluster error slice?", ('True', 'False'), index=0)
-    num_clusters = st.sidebar.slider("# clusters", min_value=1, max_value=20, step=1, value=3)
     ### LOAD DATA AND SESSION VARIABLES ###
     data_df = pd.read_parquet('./assets/data/'+dataset+ '_'+ model+'.parquet')
     if model == 'albert-base-v2-yelp-polarity':
@@ -243,13 +234,28 @@ if __name__ == "__main__":
         st.session_state["user_data"] = data_df
     if "selected_slice" not in st.session_state:
         st.session_state["selected_slice"] = None
     data_df['loss'] = data_df['loss'].astype(float)
     losses = data_df['loss']
     high_loss = losses.quantile(loss_quantile)
     data_df['slice'] = 'high-loss'
     data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
     if run_kmeans == 'True':
         merged = kmeans(data_df,num_clusters=num_clusters)
     with lcol:
@@ -264,12 +270,5 @@ if __name__ == "__main__":
             st.markdown("* The table displays model error slices on the evaluation dataset, sorted by loss.")
             st.markdown("* Each row is an input example that includes the label, model pred, loss, and error cluster.")
         st.write(dataframe,width=900, height=300)
-    with rcol:
-        with st.spinner(text='loading...'):
-            st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
-            commontokens = frequent_tokens(merged, tokenizer, loss_quantile=loss_quantile)
-            with st.expander("How to read the table:"):
-                st.markdown("* The table displays the most frequent tokens in error slices, relative to their frequencies in the val set.")
-            st.write(commontokens)
     quant_panel(merged)

         ["distilbert-base-uncased-finetuned-sst-2-english",
             "albert-base-v2-yelp-polarity"],
     )
     ### LOAD DATA AND SESSION VARIABLES ###
     data_df = pd.read_parquet('./assets/data/'+dataset+ '_'+ model+'.parquet')
     if model == 'albert-base-v2-yelp-polarity':
         st.session_state["user_data"] = data_df
     if "selected_slice" not in st.session_state:
         st.session_state["selected_slice"] = None
+    loss_quantile = st.sidebar.slider(
+        "Loss Quantile", min_value=0.5, max_value=1.0,step=0.01,value=0.95
+    )
     data_df['loss'] = data_df['loss'].astype(float)
     losses = data_df['loss']
     high_loss = losses.quantile(loss_quantile)
     data_df['slice'] = 'high-loss'
     data_df['slice'] = data_df['slice'].where(data_df['loss'] > high_loss, 'low-loss')
+    with rcol:
+        with st.spinner(text='loading...'):
+            st.markdown('<h3>Word Distribution in Error Slice</h3>', unsafe_allow_html=True)
+            commontokens = frequent_tokens(data_df, tokenizer, loss_quantile=loss_quantile)
+            with st.expander("How to read the table:"):
+                st.markdown("* The table displays the most frequent tokens in error slices, relative to their frequencies in the val set.")
+            st.write(commontokens)
+    run_kmeans = st.sidebar.radio("Cluster error slice?", ('True', 'False'), index=0)
+    num_clusters = st.sidebar.slider("# clusters", min_value=1, max_value=20, step=1, value=3)
     if run_kmeans == 'True':
         merged = kmeans(data_df,num_clusters=num_clusters)
     with lcol:
             st.markdown("* The table displays model error slices on the evaluation dataset, sorted by loss.")
             st.markdown("* Each row is an input example that includes the label, model pred, loss, and error cluster.")
         st.write(dataframe,width=900, height=300)
     quant_panel(merged)