Spaces:

lmarena-ai
/

preference-proxy-evaluations

Running

App Files Files Community

Evan Frick commited on Oct 22, 2024

Commit

4001fbf

1 Parent(s): 01f0e4f

a

Browse files

Files changed (1) hide show

app.py +27 -10

app.py CHANGED Viewed

@@ -43,13 +43,13 @@ def main():
     # Iterate over each model in the selected benchmark
     for model, metrics in benchmark_data.items():
         model = path_split(path_splitext(model)[0])[-1]
         # Flatten the metrics dictionary if there are nested metrics
         # For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
         # We'll aggregate these or allow the user to select subcategories as needed
         if isinstance(metrics, dict):
-            # Check if metrics contain nested dictionaries
-            nested_keys = list(metrics.keys())
             # If there are nested keys, we can allow the user to select a subcategory
             # For simplicity, let's assume we want to display all nested metrics concatenated
             flattened_metrics = {}
@@ -63,12 +63,14 @@ def main():
                     flattened_metrics[subkey] = submetrics
             records.append({
                 "Model": model,
                 **flattened_metrics
             })
         else:
             # If metrics are not nested, just add them directly
             records.append({
                 "Model": model,
                 "Value": metrics
             })
@@ -79,23 +81,27 @@ def main():
     df = df.loc[:, ~df.apply(contains_list)]
     if "human" not in selected_benchmark:
-        df = df[sorted(df.columns, key=str.lower)]
     # Set 'Model' as the index
-    df.set_index("Model", inplace=True)
         # Create two columns: one for spacing and one for the search bar
-    col1, col2, col3 = st.columns([1, 3, 1])  # Adjust the ratios as needed
     with col1:
-        # **Column Search Functionality**
-        # st.markdown("#### Filter Columns")
         column_search = st.text_input("", placeholder="Search metrics...", key="search")
-    # column_search = st.text_input("Search for metrics (column names):", "")
     if column_search:
         # Filter columns that contain the search term (case-insensitive)
-        filtered_columns = [col for col in df.columns if column_search.lower() in col.lower()]
         if filtered_columns:
             df_display = df[filtered_columns]
         else:
@@ -105,8 +111,19 @@ def main():
         # If no search term, display all columns
         df_display = df
     # Display the DataFrame
-    st.dataframe(df_display.sort_values(df_display.columns[0], ascending=False) if len(df_display) else df_display, use_container_width=True)
     # Optional: Allow user to download the data as CSV
     csv = df_display.to_csv()

     # Iterate over each model in the selected benchmark
     for model, metrics in benchmark_data.items():
+        model_type = "LLM Judge" if model.endswith(".jsonl") else "Reward Model"
         model = path_split(path_splitext(model)[0])[-1]
         # Flatten the metrics dictionary if there are nested metrics
         # For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
         # We'll aggregate these or allow the user to select subcategories as needed
         if isinstance(metrics, dict):
             # If there are nested keys, we can allow the user to select a subcategory
             # For simplicity, let's assume we want to display all nested metrics concatenated
             flattened_metrics = {}
                     flattened_metrics[subkey] = submetrics
             records.append({
                 "Model": model,
+                "Type": model_type,
                 **flattened_metrics
             })
         else:
             # If metrics are not nested, just add them directly
             records.append({
                 "Model": model,
+                "Type": model_type,
                 "Value": metrics
             })
     df = df.loc[:, ~df.apply(contains_list)]
     if "human" not in selected_benchmark:
+        df = df[sorted(df.columns, key=lambda s: s.lower() if s != "Type" else "A")]
     # Set 'Model' as the index
+    df.set_index(["Model"], inplace=True)
         # Create two columns: one for spacing and one for the search bar
+    col1, col2, col3 = st.columns([1, 1, 2])  # Adjust the ratios as needed
     with col1:
         column_search = st.text_input("", placeholder="Search metrics...", key="search")
+    with col2:
+        model_search = st.text_input("", placeholder="Filter Models (separate criteria with ,) ...", key="search2")
+        model_search_crit = model_search.replace(", ", "|").replace(",", "|")
     if column_search:
         # Filter columns that contain the search term (case-insensitive)
+        filtered_columns = ["Type"] + [col for col in df.columns if column_search.lower() in col.lower()]
         if filtered_columns:
             df_display = df[filtered_columns]
         else:
         # If no search term, display all columns
         df_display = df
+    if model_search:
+        df_display = df_display[df_display.index.str.contains(model_search_crit, case=False)]
+        if len(df_display) == 0:
+            st.warning("No models match your filter.")
+            df_display = pd.DataFrame()  # Empty DataFrame
     # Display the DataFrame
+    st.dataframe(df_display.sort_values(df_display.columns[1], ascending=False).style.background_gradient(cmap='summer_r', axis=0)
+ if len(df_display) else df_display, use_container_width=True, height=500)
     # Optional: Allow user to download the data as CSV
     csv = df_display.to_csv()