Spaces:

OpenHands
/

openhands-index

Running

openhands openhands commited on Jan 26

Commit

71ba49b

1 Parent(s): a4b9436

Fix column name handling in visualizations for release_date and parameter_count

- Handle different column name formats (e.g., 'Release_Date' vs 'release_date')
- Fix score column detection to handle 'Average Score' capitalization
- Fix model column detection to handle 'Language Model' capitalization
- Fix parameter count column detection for size chart

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (1) hide show

visualizations.py +61 -17

visualizations.py CHANGED Viewed

@@ -13,12 +13,19 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
     Create a line chart showing model performance evolution over release dates.
     Args:
-        df: DataFrame with columns including 'release_date', 'Language Model', 'average score', 'openness'
     Returns:
         Plotly figure showing score evolution over time
     """
-    if df.empty or 'release_date' not in df.columns:
         fig = go.Figure()
         fig.add_annotation(
             text="No release date data available",
@@ -29,7 +36,7 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
         return fig
     # Filter out rows without release dates
-    plot_df = df[df['release_date'].notna() & (df['release_date'] != '')].copy()
     if plot_df.empty:
         fig = go.Figure()
@@ -41,15 +48,19 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
         )
         return fig
-    # Convert release_date to datetime
-    plot_df['release_date'] = pd.to_datetime(plot_df['release_date'], errors='coerce')
     plot_df = plot_df.dropna(subset=['release_date'])
     # Sort by release date
     plot_df = plot_df.sort_values('release_date')
-    # Get the score column
-    score_col = 'average score' if 'average score' in plot_df.columns else None
     if score_col is None:
         for col in plot_df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
@@ -67,7 +78,13 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
         return fig
     # Get model name column
-    model_col = 'Language Model' if 'Language Model' in plot_df.columns else 'Language model'
     # Map openness to colors
     color_map = {
@@ -177,13 +194,27 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
     Create a scatter plot showing accuracy vs parameter count for open-weights models.
     Args:
-        df: DataFrame with columns including 'parameter_count_b', 'active_parameter_count_b',
             'average score', 'openness', 'Language Model'
     Returns:
         Plotly figure showing accuracy vs model size
     """
-    if df.empty or 'parameter_count_b' not in df.columns:
         fig = go.Figure()
         fig.add_annotation(
             text="No parameter count data available",
@@ -196,9 +227,12 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
     # Filter to only open-weights models with parameter data
     open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
     plot_df = df[
-        (df['parameter_count_b'].notna()) &
-        (df['Openness'].isin(open_aliases) | df.get('openness', pd.Series()).isin(open_aliases))
     ].copy()
     if plot_df.empty:
@@ -211,8 +245,12 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
         )
         return fig
-    # Get the score column
-    score_col = 'average score' if 'average score' in plot_df.columns else None
     if score_col is None:
         for col in plot_df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
@@ -230,15 +268,21 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
         return fig
     # Get model name column
-    model_col = 'Language Model' if 'Language Model' in plot_df.columns else 'Language model'
     fig = go.Figure()
     # Determine if we should use active params (for MoE models) or total params
     # Use active params if available, otherwise total params
     for _, row in plot_df.iterrows():
-        total_params = row['parameter_count_b']
-        active_params = row.get('active_parameter_count_b')
         model_name = row.get(model_col, 'Unknown')
         score = row[score_col]

     Create a line chart showing model performance evolution over release dates.
     Args:
+        df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'
     Returns:
         Plotly figure showing score evolution over time
     """
+    # Handle different column name formats
+    release_date_col = None
+    for col in ['release_date', 'Release_Date', 'Release Date']:
+        if col in df.columns:
+            release_date_col = col
+            break
+    if df.empty or release_date_col is None:
         fig = go.Figure()
         fig.add_annotation(
             text="No release date data available",
         return fig
     # Filter out rows without release dates
+    plot_df = df[df[release_date_col].notna() & (df[release_date_col] != '')].copy()
     if plot_df.empty:
         fig = go.Figure()
         )
         return fig
+    # Convert release_date to datetime (normalize column name)
+    plot_df['release_date'] = pd.to_datetime(plot_df[release_date_col], errors='coerce')
     plot_df = plot_df.dropna(subset=['release_date'])
     # Sort by release date
     plot_df = plot_df.sort_values('release_date')
+    # Get the score column (handle different naming conventions)
+    score_col = None
+    for col in ['average score', 'Average Score', 'Average score']:
+        if col in plot_df.columns:
+            score_col = col
+            break
     if score_col is None:
         for col in plot_df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
         return fig
     # Get model name column
+    model_col = None
+    for col in ['Language Model', 'Language model', 'llm_base']:
+        if col in plot_df.columns:
+            model_col = col
+            break
+    if model_col is None:
+        model_col = 'Language Model'  # Default
     # Map openness to colors
     color_map = {
     Create a scatter plot showing accuracy vs parameter count for open-weights models.
     Args:
+        df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
+            'active_parameter_count_b' or 'Active_Parameter_Count_B',
             'average score', 'openness', 'Language Model'
     Returns:
         Plotly figure showing accuracy vs model size
     """
+    # Handle different column name formats for parameter count
+    param_col = None
+    for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
+        if col in df.columns:
+            param_col = col
+            break
+    active_param_col = None
+    for col in ['active_parameter_count_b', 'Active_Parameter_Count_B', 'Active Parameter Count B']:
+        if col in df.columns:
+            active_param_col = col
+            break
+    if df.empty or param_col is None:
         fig = go.Figure()
         fig.add_annotation(
             text="No parameter count data available",
     # Filter to only open-weights models with parameter data
     open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
+    # Get openness column
+    openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
     plot_df = df[
+        (df[param_col].notna()) &
+        (df[openness_col].isin(open_aliases))
     ].copy()
     if plot_df.empty:
         )
         return fig
+    # Get the score column (handle different naming conventions)
+    score_col = None
+    for col in ['average score', 'Average Score', 'Average score']:
+        if col in plot_df.columns:
+            score_col = col
+            break
     if score_col is None:
         for col in plot_df.columns:
             if 'score' in col.lower() and 'average' in col.lower():
         return fig
     # Get model name column
+    model_col = None
+    for col in ['Language Model', 'Language model', 'llm_base']:
+        if col in plot_df.columns:
+            model_col = col
+            break
+    if model_col is None:
+        model_col = 'Language Model'  # Default
     fig = go.Figure()
     # Determine if we should use active params (for MoE models) or total params
     # Use active params if available, otherwise total params
     for _, row in plot_df.iterrows():
+        total_params = row[param_col]
+        active_params = row.get(active_param_col) if active_param_col else None
         model_name = row.get(model_col, 'Unknown')
         score = row[score_col]