Spaces:

LeMaterial
/

LeMat-GenBench

Running

App Files Files Community

cgeorgiaw HF Staff commited on Feb 12

Commit

2484fe9

1 Parent(s): bc5d507

adding filter for training set

Browse files

Files changed (2) hide show

about.py +3 -0
app.py +54 -8

about.py CHANGED Viewed

@@ -61,6 +61,8 @@ COLUMN_DISPLAY_NAMES = {
     'hhi_production_mean': 'HHI Production',
     'hhi_reserve_mean': 'HHI Reserve',
     'hhi_combined_mean': 'HHI Combined',
 }
 # Metrics that can be shown as percentages (count-based metrics)
@@ -152,6 +154,7 @@ COLUMN_TO_GROUP = get_column_to_group_mapping()
 # Compact view columns (most important metrics visible without scrolling)
 COMPACT_VIEW_COLUMNS = [
     'model_name',
     'overall_valid_count',
     'unique_count',
     'novel_count',

     'hhi_production_mean': 'HHI Production',
     'hhi_reserve_mean': 'HHI Reserve',
     'hhi_combined_mean': 'HHI Combined',
+    # Metadata columns
+    'training_set': 'Training Set',
 }
 # Metrics that can be shown as percentages (count-based metrics)
 # Compact view columns (most important metrics visible without scrolling)
 COMPACT_VIEW_COLUMNS = [
     'model_name',
+    'training_set',
     'overall_valid_count',
     'unique_count',
     'novel_count',

app.py CHANGED Viewed

@@ -45,6 +45,8 @@ def format_dataframe(df, show_percentage=False, selected_groups=None, compact_vi
         selected_cols = [col for col in COMPACT_VIEW_COLUMNS if col in df.columns]
     else:
         # Build from selected groups
         if 'n_structures' in df.columns:
             selected_cols.append('n_structures')
@@ -73,6 +75,12 @@ def format_dataframe(df, show_percentage=False, selected_groups=None, compact_vi
             name = row['model_name']
             symbols = []
             # Add relaxed symbol
             if 'relaxed' in df.columns and row.get('relaxed', False):
                 symbols.append('⚡')
@@ -109,16 +117,28 @@ def format_dataframe(df, show_percentage=False, selected_groups=None, compact_vi
         if display_df[col].dtype in ['float64', 'float32']:
             display_df[col] = display_df[col].round(4)
     # Rename columns for display
     display_df = display_df.rename(columns=COLUMN_DISPLAY_NAMES)
     # Apply color coding based on metric groups
-    styled_df = apply_color_styling(display_df, selected_cols)
     return styled_df
-def apply_color_styling(display_df, original_cols):
     """Apply background colors to dataframe based on metric groups using pandas Styler."""
     def style_by_group(x):
         # Create a DataFrame with the same shape filled with empty strings
@@ -136,12 +156,20 @@ def apply_color_styling(display_df, original_cols):
                     if color:
                         styles[display_col] = f'background-color: {color}'
         return styles
     # Apply the styling function
     return display_df.style.apply(style_by_group, axis=None)
-def update_leaderboard(show_percentage, selected_groups, compact_view, cached_df, sort_by, sort_direction):
     """Update the leaderboard based on user selections.
     Uses cached dataframe to avoid re-downloading data on every change.
@@ -149,6 +177,10 @@ def update_leaderboard(show_percentage, selected_groups, compact_view, cached_df
     # Use cached dataframe instead of re-downloading
     df_to_format = cached_df.copy()
     # Convert display name back to raw column name for sorting
     if sort_by and sort_by != "None":
         # Create reverse mapping from display names to raw column names
@@ -321,6 +353,12 @@ Generative machine learning models hold great promise for accelerating materials
                             value="Descending",
                             label="Sort Direction"
                         )
                     with gr.Column(scale=2):
                         selected_groups = gr.CheckboxGroup(
                             choices=list(METRIC_GROUPS.keys()),
@@ -353,27 +391,32 @@ Generative machine learning models hold great promise for accelerating materials
                     # Update dataframe when options change (using cached data)
                     show_percentage.change(
                         fn=update_leaderboard,
-                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction],
                         outputs=leaderboard_table
                     )
                     selected_groups.change(
                         fn=update_leaderboard,
-                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction],
                         outputs=leaderboard_table
                     )
                     compact_view.change(
                         fn=update_leaderboard,
-                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction],
                         outputs=leaderboard_table
                     )
                     sort_by.change(
                         fn=update_leaderboard,
-                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction],
                         outputs=leaderboard_table
                     )
                     sort_direction.change(
                         fn=update_leaderboard,
-                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction],
                         outputs=leaderboard_table
                     )
@@ -382,12 +425,15 @@ Generative machine learning models hold great promise for accelerating materials
                 gr.Markdown("""
 **Symbol Legend:**
 - ✅ Model output verified
 - ⚡ Structures were already relaxed
 - ★ Contributes to LeMat-Bulk reference dataset (in-distribution)
 - ◆ Out-of-distribution relative to LeMat-Bulk reference dataset
 Verified submissions mean the results came from a model submission rather than a CIF submission.
 """)
             with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):

         selected_cols = [col for col in COMPACT_VIEW_COLUMNS if col in df.columns]
     else:
         # Build from selected groups
+        if 'training_set' in df.columns:
+            selected_cols.append('training_set')
         if 'n_structures' in df.columns:
             selected_cols.append('n_structures')
             name = row['model_name']
             symbols = []
+            # Add paper link emoji
+            if 'paper_link' in df.columns:
+                paper_val = row.get('paper_link', None)
+                if paper_val and isinstance(paper_val, str) and paper_val.strip():
+                    symbols.append(f'<a href="{paper_val.strip()}" target="_blank">📄</a>')
             # Add relaxed symbol
             if 'relaxed' in df.columns and row.get('relaxed', False):
                 symbols.append('⚡')
         if display_df[col].dtype in ['float64', 'float32']:
             display_df[col] = display_df[col].round(4)
+    # Separate baseline models to the bottom
+    baseline_indices = set()
+    if 'notes' in df.columns:
+        is_baseline = df['notes'].fillna('').str.contains('baseline', case=False, na=False)
+        non_baseline_df = display_df[~is_baseline]
+        baseline_df = display_df[is_baseline]
+        display_df = pd.concat([non_baseline_df, baseline_df]).reset_index(drop=True)
+        # Track baseline row indices in the new dataframe
+        baseline_indices = set(range(len(non_baseline_df), len(display_df)))
     # Rename columns for display
     display_df = display_df.rename(columns=COLUMN_DISPLAY_NAMES)
     # Apply color coding based on metric groups
+    styled_df = apply_color_styling(display_df, selected_cols, baseline_indices)
     return styled_df
+def apply_color_styling(display_df, original_cols, baseline_indices=None):
     """Apply background colors to dataframe based on metric groups using pandas Styler."""
+    if baseline_indices is None:
+        baseline_indices = set()
     def style_by_group(x):
         # Create a DataFrame with the same shape filled with empty strings
                     if color:
                         styles[display_col] = f'background-color: {color}'
+        # Add thick top border to the first baseline row as a separator
+        if baseline_indices:
+            first_baseline_idx = min(baseline_indices)
+            for col in x.columns:
+                current = styles.at[first_baseline_idx, col]
+                separator_style = 'border-top: 3px solid #555'
+                styles.at[first_baseline_idx, col] = f'{current}; {separator_style}' if current else separator_style
         return styles
     # Apply the styling function
     return display_df.style.apply(style_by_group, axis=None)
+def update_leaderboard(show_percentage, selected_groups, compact_view, cached_df, sort_by, sort_direction, training_set_filter):
     """Update the leaderboard based on user selections.
     Uses cached dataframe to avoid re-downloading data on every change.
     # Use cached dataframe instead of re-downloading
     df_to_format = cached_df.copy()
+    # Apply training set filter
+    if training_set_filter and training_set_filter != "All" and 'training_set' in df_to_format.columns:
+        df_to_format = df_to_format[df_to_format['training_set'] == training_set_filter]
     # Convert display name back to raw column name for sorting
     if sort_by and sort_by != "None":
         # Create reverse mapping from display names to raw column names
                             value="Descending",
                             label="Sort Direction"
                         )
+                        training_set_filter = gr.Dropdown(
+                            choices=["All"] + TRAINING_DATASETS,
+                            value="All",
+                            label="Filter by Training Set",
+                            info="Show only models trained on a specific dataset"
+                        )
                     with gr.Column(scale=2):
                         selected_groups = gr.CheckboxGroup(
                             choices=list(METRIC_GROUPS.keys()),
                     # Update dataframe when options change (using cached data)
                     show_percentage.change(
                         fn=update_leaderboard,
+                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction, training_set_filter],
                         outputs=leaderboard_table
                     )
                     selected_groups.change(
                         fn=update_leaderboard,
+                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction, training_set_filter],
                         outputs=leaderboard_table
                     )
                     compact_view.change(
                         fn=update_leaderboard,
+                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction, training_set_filter],
                         outputs=leaderboard_table
                     )
                     sort_by.change(
                         fn=update_leaderboard,
+                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction, training_set_filter],
                         outputs=leaderboard_table
                     )
                     sort_direction.change(
                         fn=update_leaderboard,
+                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction, training_set_filter],
+                        outputs=leaderboard_table
+                    )
+                    training_set_filter.change(
+                        fn=update_leaderboard,
+                        inputs=[show_percentage, selected_groups, compact_view, cached_df_state, sort_by, sort_direction, training_set_filter],
                         outputs=leaderboard_table
                     )
                 gr.Markdown("""
 **Symbol Legend:**
+- 📄 Paper available (click to view)
 - ✅ Model output verified
 - ⚡ Structures were already relaxed
 - ★ Contributes to LeMat-Bulk reference dataset (in-distribution)
 - ◆ Out-of-distribution relative to LeMat-Bulk reference dataset
 Verified submissions mean the results came from a model submission rather than a CIF submission.
+Models marked as baselines appear below the separator line at the bottom of the table.
 """)
             with gr.TabItem("✉️ Submit", elem_id="boundary-benchmark-tab-table"):