Spaces:

openadmet
/

pxr-challenge

Running

hmacdope Claude Sonnet 4.6 commited on 22 days ago

Commit

4738a52

1 Parent(s): 420eddb

fix: download CSV shows mean and std as separate columns

Replaces the XX±YY collapsed format with plain mean and std columns
(e.g. MAE / MAE std) for easier programmatic use. Removes now-dead
_collapse_mean_std and _fmt_metric helpers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +34 -66

app.py CHANGED Viewed

@@ -47,35 +47,6 @@ def hide_username_for_anonymous_entries(df: pd.DataFrame) -> pd.DataFrame:
     return df.drop(columns=["user_alias", "anonymous"], errors="ignore")
-def _fmt_metric(v: float) -> str:
-    """Format a metric value, returning '—' for NaN."""
-    if pd.isna(v):
-        return "—"
-    return f"{v:.4f}"
-def _collapse_mean_std(df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
-    """Combine *_mean / *_std column pairs into a single 'XX±YY' string column.
-    Args:
-        df: Leaderboard dataframe with <metric>_mean and <metric>_std columns.
-        metrics: List of metric base names to collapse.
-    Returns:
-        DataFrame with combined columns replacing the original pairs.
-    """
-    df = df.copy()
-    for metric in metrics:
-        mean_col = f"{metric}_mean"
-        std_col = f"{metric}_std"
-        if mean_col in df.columns and std_col in df.columns:
-            df[metric] = (
-                df[mean_col].map(_fmt_metric) + "±" + df[std_col].map(_fmt_metric)
-            )
-            df = df.drop(columns=[mean_col, std_col])
-    return df
 _ACTIVITY_EMPTY = pd.DataFrame(
     columns=[
@@ -111,37 +82,34 @@ def _prepare_activity_df(df: pd.DataFrame, for_download: bool = False) -> pd.Dat
     Args:
         df: Raw leaderboard DataFrame from S3.
-        for_download: If True, collapse mean/std pairs into 'XX±YY' strings for
-            the downloadable CSV. If False (default), keep mean values as plain
-            floats for numeric sorting in the live leaderboard.
     Returns:
         Prepared DataFrame.
     """
     df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
     if for_download:
-        df = _collapse_mean_std(df, ["MAE", "RAE", "R2", "Spearman_R", "Kendall's_Tau"])
-        df = df.rename(
-            columns={
-                "Spearman_R": "Spearman ρ",
-                "Kendall's_Tau": "Kendall's τ",
-                "submitted_at": "Submitted",
-            }
-        )
     else:
-        std_cols = [c for c in df.columns if c.endswith("_std")]
         df = df.drop(columns=std_cols)
-        df = df.rename(
-            columns={
-                "MAE_mean": "MAE",
-                "RAE_mean": "RAE",
-                "R2_mean": "R2",
-                "Spearman_R_mean": "Spearman ρ",
-                "Kendall's_Tau_mean": "Kendall's τ",
-                "submitted_at": "Submitted",
-            }
-        )
         for col in ["MAE", "RAE", "R2", "Spearman ρ", "Kendall's τ"]:
             if col in df.columns:
                 df[col] = df[col].round(4)
@@ -171,24 +139,24 @@ def _prepare_structure_df(df: pd.DataFrame, for_download: bool = False) -> pd.Da
     """
     df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
     if for_download:
-        df = _collapse_mean_std(df, ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand_RMSD"])
-        df["Coverage"] = (
-            df["coverage_mean"].map(_fmt_metric) if "coverage_mean" in df.columns else "—"
-        )
-        df = df.drop(columns=["coverage_mean"], errors="ignore")
     else:
-        std_cols = [c for c in df.columns if c.endswith("_std")]
         df = df.drop(columns=std_cols)
-        df = df.rename(
-            columns={
-                "LDDT-PLI_mean": "LDDT-PLI",
-                "BiSyRMSD_mean": "BiSyRMSD",
-                "LDDT-LP_mean": "LDDT-LP",
-                "Ligand_RMSD_mean": "Ligand RMSD",
-                "coverage_mean": "Coverage",
-            }
-        )
         for col in ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand RMSD", "Coverage"]:
             if col in df.columns:
                 df[col] = df[col].round(4)

     return df.drop(columns=["user_alias", "anonymous"], errors="ignore")
 _ACTIVITY_EMPTY = pd.DataFrame(
     columns=[
     Args:
         df: Raw leaderboard DataFrame from S3.
+        for_download: If True, keep mean and std as separate columns with full
+            precision for the downloadable CSV. If False (default), drop std
+            columns and round means to 4 dp for the live leaderboard.
     Returns:
         Prepared DataFrame.
     """
     df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
+    rename_map = {
+        "MAE_mean": "MAE",
+        "RAE_mean": "RAE",
+        "R2_mean": "R2",
+        "Spearman_R_mean": "Spearman ρ",
+        "Kendall's_Tau_mean": "Kendall's τ",
+        "MAE_std": "MAE std",
+        "RAE_std": "RAE std",
+        "R2_std": "R2 std",
+        "Spearman_R_std": "Spearman ρ std",
+        "Kendall's_Tau_std": "Kendall's τ std",
+        "submitted_at": "Submitted",
+    }
+    df = df.rename(columns=rename_map)
     if for_download:
+        pass  # keep mean and std columns as-is with full precision
     else:
+        std_cols = [c for c in df.columns if c.endswith(" std")]
         df = df.drop(columns=std_cols)
         for col in ["MAE", "RAE", "R2", "Spearman ρ", "Kendall's τ"]:
             if col in df.columns:
                 df[col] = df[col].round(4)
     """
     df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
+    rename_map = {
+        "LDDT-PLI_mean": "LDDT-PLI",
+        "BiSyRMSD_mean": "BiSyRMSD",
+        "LDDT-LP_mean": "LDDT-LP",
+        "Ligand_RMSD_mean": "Ligand RMSD",
+        "coverage_mean": "Coverage",
+        "LDDT-PLI_std": "LDDT-PLI std",
+        "BiSyRMSD_std": "BiSyRMSD std",
+        "LDDT-LP_std": "LDDT-LP std",
+        "Ligand_RMSD_std": "Ligand RMSD std",
+        "coverage_std": "Coverage std",
+    }
+    df = df.rename(columns=rename_map)
     if for_download:
+        pass  # keep mean and std columns as-is with full precision
     else:
+        std_cols = [c for c in df.columns if c.endswith(" std")]
         df = df.drop(columns=std_cols)
         for col in ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand RMSD", "Coverage"]:
             if col in df.columns:
                 df[col] = df[col].round(4)