Spaces:
Running
Running
fix: download CSV shows mean and std as separate columns
Browse filesReplaces the XX±YY collapsed format with plain mean and std columns
(e.g. MAE / MAE std) for easier programmatic use. Removes now-dead
_collapse_mean_std and _fmt_metric helpers.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -47,35 +47,6 @@ def hide_username_for_anonymous_entries(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 47 |
return df.drop(columns=["user_alias", "anonymous"], errors="ignore")
|
| 48 |
|
| 49 |
|
| 50 |
-
def _fmt_metric(v: float) -> str:
|
| 51 |
-
"""Format a metric value, returning '—' for NaN."""
|
| 52 |
-
if pd.isna(v):
|
| 53 |
-
return "—"
|
| 54 |
-
return f"{v:.4f}"
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
def _collapse_mean_std(df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
| 58 |
-
"""Combine *_mean / *_std column pairs into a single 'XX±YY' string column.
|
| 59 |
-
|
| 60 |
-
Args:
|
| 61 |
-
df: Leaderboard dataframe with <metric>_mean and <metric>_std columns.
|
| 62 |
-
metrics: List of metric base names to collapse.
|
| 63 |
-
|
| 64 |
-
Returns:
|
| 65 |
-
DataFrame with combined columns replacing the original pairs.
|
| 66 |
-
|
| 67 |
-
"""
|
| 68 |
-
df = df.copy()
|
| 69 |
-
for metric in metrics:
|
| 70 |
-
mean_col = f"{metric}_mean"
|
| 71 |
-
std_col = f"{metric}_std"
|
| 72 |
-
if mean_col in df.columns and std_col in df.columns:
|
| 73 |
-
df[metric] = (
|
| 74 |
-
df[mean_col].map(_fmt_metric) + "±" + df[std_col].map(_fmt_metric)
|
| 75 |
-
)
|
| 76 |
-
df = df.drop(columns=[mean_col, std_col])
|
| 77 |
-
return df
|
| 78 |
-
|
| 79 |
|
| 80 |
_ACTIVITY_EMPTY = pd.DataFrame(
|
| 81 |
columns=[
|
|
@@ -111,37 +82,34 @@ def _prepare_activity_df(df: pd.DataFrame, for_download: bool = False) -> pd.Dat
|
|
| 111 |
|
| 112 |
Args:
|
| 113 |
df: Raw leaderboard DataFrame from S3.
|
| 114 |
-
for_download: If True,
|
| 115 |
-
the downloadable CSV. If False (default),
|
| 116 |
-
|
| 117 |
|
| 118 |
Returns:
|
| 119 |
Prepared DataFrame.
|
| 120 |
|
| 121 |
"""
|
| 122 |
df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
if for_download:
|
| 124 |
-
|
| 125 |
-
df = df.rename(
|
| 126 |
-
columns={
|
| 127 |
-
"Spearman_R": "Spearman ρ",
|
| 128 |
-
"Kendall's_Tau": "Kendall's τ",
|
| 129 |
-
"submitted_at": "Submitted",
|
| 130 |
-
}
|
| 131 |
-
)
|
| 132 |
else:
|
| 133 |
-
std_cols = [c for c in df.columns if c.endswith("
|
| 134 |
df = df.drop(columns=std_cols)
|
| 135 |
-
df = df.rename(
|
| 136 |
-
columns={
|
| 137 |
-
"MAE_mean": "MAE",
|
| 138 |
-
"RAE_mean": "RAE",
|
| 139 |
-
"R2_mean": "R2",
|
| 140 |
-
"Spearman_R_mean": "Spearman ρ",
|
| 141 |
-
"Kendall's_Tau_mean": "Kendall's τ",
|
| 142 |
-
"submitted_at": "Submitted",
|
| 143 |
-
}
|
| 144 |
-
)
|
| 145 |
for col in ["MAE", "RAE", "R2", "Spearman ρ", "Kendall's τ"]:
|
| 146 |
if col in df.columns:
|
| 147 |
df[col] = df[col].round(4)
|
|
@@ -171,24 +139,24 @@ def _prepare_structure_df(df: pd.DataFrame, for_download: bool = False) -> pd.Da
|
|
| 171 |
|
| 172 |
"""
|
| 173 |
df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
if for_download:
|
| 175 |
-
|
| 176 |
-
df["Coverage"] = (
|
| 177 |
-
df["coverage_mean"].map(_fmt_metric) if "coverage_mean" in df.columns else "—"
|
| 178 |
-
)
|
| 179 |
-
df = df.drop(columns=["coverage_mean"], errors="ignore")
|
| 180 |
else:
|
| 181 |
-
std_cols = [c for c in df.columns if c.endswith("
|
| 182 |
df = df.drop(columns=std_cols)
|
| 183 |
-
df = df.rename(
|
| 184 |
-
columns={
|
| 185 |
-
"LDDT-PLI_mean": "LDDT-PLI",
|
| 186 |
-
"BiSyRMSD_mean": "BiSyRMSD",
|
| 187 |
-
"LDDT-LP_mean": "LDDT-LP",
|
| 188 |
-
"Ligand_RMSD_mean": "Ligand RMSD",
|
| 189 |
-
"coverage_mean": "Coverage",
|
| 190 |
-
}
|
| 191 |
-
)
|
| 192 |
for col in ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand RMSD", "Coverage"]:
|
| 193 |
if col in df.columns:
|
| 194 |
df[col] = df[col].round(4)
|
|
|
|
| 47 |
return df.drop(columns=["user_alias", "anonymous"], errors="ignore")
|
| 48 |
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
_ACTIVITY_EMPTY = pd.DataFrame(
|
| 52 |
columns=[
|
|
|
|
| 82 |
|
| 83 |
Args:
|
| 84 |
df: Raw leaderboard DataFrame from S3.
|
| 85 |
+
for_download: If True, keep mean and std as separate columns with full
|
| 86 |
+
precision for the downloadable CSV. If False (default), drop std
|
| 87 |
+
columns and round means to 4 dp for the live leaderboard.
|
| 88 |
|
| 89 |
Returns:
|
| 90 |
Prepared DataFrame.
|
| 91 |
|
| 92 |
"""
|
| 93 |
df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
|
| 94 |
+
rename_map = {
|
| 95 |
+
"MAE_mean": "MAE",
|
| 96 |
+
"RAE_mean": "RAE",
|
| 97 |
+
"R2_mean": "R2",
|
| 98 |
+
"Spearman_R_mean": "Spearman ρ",
|
| 99 |
+
"Kendall's_Tau_mean": "Kendall's τ",
|
| 100 |
+
"MAE_std": "MAE std",
|
| 101 |
+
"RAE_std": "RAE std",
|
| 102 |
+
"R2_std": "R2 std",
|
| 103 |
+
"Spearman_R_std": "Spearman ρ std",
|
| 104 |
+
"Kendall's_Tau_std": "Kendall's τ std",
|
| 105 |
+
"submitted_at": "Submitted",
|
| 106 |
+
}
|
| 107 |
+
df = df.rename(columns=rename_map)
|
| 108 |
if for_download:
|
| 109 |
+
pass # keep mean and std columns as-is with full precision
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
else:
|
| 111 |
+
std_cols = [c for c in df.columns if c.endswith(" std")]
|
| 112 |
df = df.drop(columns=std_cols)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
for col in ["MAE", "RAE", "R2", "Spearman ρ", "Kendall's τ"]:
|
| 114 |
if col in df.columns:
|
| 115 |
df[col] = df[col].round(4)
|
|
|
|
| 139 |
|
| 140 |
"""
|
| 141 |
df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
|
| 142 |
+
rename_map = {
|
| 143 |
+
"LDDT-PLI_mean": "LDDT-PLI",
|
| 144 |
+
"BiSyRMSD_mean": "BiSyRMSD",
|
| 145 |
+
"LDDT-LP_mean": "LDDT-LP",
|
| 146 |
+
"Ligand_RMSD_mean": "Ligand RMSD",
|
| 147 |
+
"coverage_mean": "Coverage",
|
| 148 |
+
"LDDT-PLI_std": "LDDT-PLI std",
|
| 149 |
+
"BiSyRMSD_std": "BiSyRMSD std",
|
| 150 |
+
"LDDT-LP_std": "LDDT-LP std",
|
| 151 |
+
"Ligand_RMSD_std": "Ligand RMSD std",
|
| 152 |
+
"coverage_std": "Coverage std",
|
| 153 |
+
}
|
| 154 |
+
df = df.rename(columns=rename_map)
|
| 155 |
if for_download:
|
| 156 |
+
pass # keep mean and std columns as-is with full precision
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
else:
|
| 158 |
+
std_cols = [c for c in df.columns if c.endswith(" std")]
|
| 159 |
df = df.drop(columns=std_cols)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
for col in ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand RMSD", "Coverage"]:
|
| 161 |
if col in df.columns:
|
| 162 |
df[col] = df[col].round(4)
|