hmacdope Claude Sonnet 4.6 commited on
Commit
4738a52
·
1 Parent(s): 420eddb

fix: download CSV shows mean and std as separate columns

Browse files

Replaces the XX±YY collapsed format with plain mean and std columns
(e.g. MAE / MAE std) for easier programmatic use. Removes now-dead
_collapse_mean_std and _fmt_metric helpers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +34 -66
app.py CHANGED
@@ -47,35 +47,6 @@ def hide_username_for_anonymous_entries(df: pd.DataFrame) -> pd.DataFrame:
47
  return df.drop(columns=["user_alias", "anonymous"], errors="ignore")
48
 
49
 
50
- def _fmt_metric(v: float) -> str:
51
- """Format a metric value, returning '—' for NaN."""
52
- if pd.isna(v):
53
- return "—"
54
- return f"{v:.4f}"
55
-
56
-
57
- def _collapse_mean_std(df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
58
- """Combine *_mean / *_std column pairs into a single 'XX±YY' string column.
59
-
60
- Args:
61
- df: Leaderboard dataframe with <metric>_mean and <metric>_std columns.
62
- metrics: List of metric base names to collapse.
63
-
64
- Returns:
65
- DataFrame with combined columns replacing the original pairs.
66
-
67
- """
68
- df = df.copy()
69
- for metric in metrics:
70
- mean_col = f"{metric}_mean"
71
- std_col = f"{metric}_std"
72
- if mean_col in df.columns and std_col in df.columns:
73
- df[metric] = (
74
- df[mean_col].map(_fmt_metric) + "±" + df[std_col].map(_fmt_metric)
75
- )
76
- df = df.drop(columns=[mean_col, std_col])
77
- return df
78
-
79
 
80
  _ACTIVITY_EMPTY = pd.DataFrame(
81
  columns=[
@@ -111,37 +82,34 @@ def _prepare_activity_df(df: pd.DataFrame, for_download: bool = False) -> pd.Dat
111
 
112
  Args:
113
  df: Raw leaderboard DataFrame from S3.
114
- for_download: If True, collapse mean/std pairs into 'XX±YY' strings for
115
- the downloadable CSV. If False (default), keep mean values as plain
116
- floats for numeric sorting in the live leaderboard.
117
 
118
  Returns:
119
  Prepared DataFrame.
120
 
121
  """
122
  df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  if for_download:
124
- df = _collapse_mean_std(df, ["MAE", "RAE", "R2", "Spearman_R", "Kendall's_Tau"])
125
- df = df.rename(
126
- columns={
127
- "Spearman_R": "Spearman ρ",
128
- "Kendall's_Tau": "Kendall's τ",
129
- "submitted_at": "Submitted",
130
- }
131
- )
132
  else:
133
- std_cols = [c for c in df.columns if c.endswith("_std")]
134
  df = df.drop(columns=std_cols)
135
- df = df.rename(
136
- columns={
137
- "MAE_mean": "MAE",
138
- "RAE_mean": "RAE",
139
- "R2_mean": "R2",
140
- "Spearman_R_mean": "Spearman ρ",
141
- "Kendall's_Tau_mean": "Kendall's τ",
142
- "submitted_at": "Submitted",
143
- }
144
- )
145
  for col in ["MAE", "RAE", "R2", "Spearman ρ", "Kendall's τ"]:
146
  if col in df.columns:
147
  df[col] = df[col].round(4)
@@ -171,24 +139,24 @@ def _prepare_structure_df(df: pd.DataFrame, for_download: bool = False) -> pd.Da
171
 
172
  """
173
  df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  if for_download:
175
- df = _collapse_mean_std(df, ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand_RMSD"])
176
- df["Coverage"] = (
177
- df["coverage_mean"].map(_fmt_metric) if "coverage_mean" in df.columns else "—"
178
- )
179
- df = df.drop(columns=["coverage_mean"], errors="ignore")
180
  else:
181
- std_cols = [c for c in df.columns if c.endswith("_std")]
182
  df = df.drop(columns=std_cols)
183
- df = df.rename(
184
- columns={
185
- "LDDT-PLI_mean": "LDDT-PLI",
186
- "BiSyRMSD_mean": "BiSyRMSD",
187
- "LDDT-LP_mean": "LDDT-LP",
188
- "Ligand_RMSD_mean": "Ligand RMSD",
189
- "coverage_mean": "Coverage",
190
- }
191
- )
192
  for col in ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand RMSD", "Coverage"]:
193
  if col in df.columns:
194
  df[col] = df[col].round(4)
 
47
  return df.drop(columns=["user_alias", "anonymous"], errors="ignore")
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  _ACTIVITY_EMPTY = pd.DataFrame(
52
  columns=[
 
82
 
83
  Args:
84
  df: Raw leaderboard DataFrame from S3.
85
+ for_download: If True, keep mean and std as separate columns with full
86
+ precision for the downloadable CSV. If False (default), drop std
87
+ columns and round means to 4 dp for the live leaderboard.
88
 
89
  Returns:
90
  Prepared DataFrame.
91
 
92
  """
93
  df = df.sort_values("RAE_mean", ascending=True).reset_index(drop=True)
94
+ rename_map = {
95
+ "MAE_mean": "MAE",
96
+ "RAE_mean": "RAE",
97
+ "R2_mean": "R2",
98
+ "Spearman_R_mean": "Spearman ρ",
99
+ "Kendall's_Tau_mean": "Kendall's τ",
100
+ "MAE_std": "MAE std",
101
+ "RAE_std": "RAE std",
102
+ "R2_std": "R2 std",
103
+ "Spearman_R_std": "Spearman ρ std",
104
+ "Kendall's_Tau_std": "Kendall's τ std",
105
+ "submitted_at": "Submitted",
106
+ }
107
+ df = df.rename(columns=rename_map)
108
  if for_download:
109
+ pass # keep mean and std columns as-is with full precision
 
 
 
 
 
 
 
110
  else:
111
+ std_cols = [c for c in df.columns if c.endswith(" std")]
112
  df = df.drop(columns=std_cols)
 
 
 
 
 
 
 
 
 
 
113
  for col in ["MAE", "RAE", "R2", "Spearman ρ", "Kendall's τ"]:
114
  if col in df.columns:
115
  df[col] = df[col].round(4)
 
139
 
140
  """
141
  df = df.sort_values("LDDT-PLI_mean", ascending=False).reset_index(drop=True)
142
+ rename_map = {
143
+ "LDDT-PLI_mean": "LDDT-PLI",
144
+ "BiSyRMSD_mean": "BiSyRMSD",
145
+ "LDDT-LP_mean": "LDDT-LP",
146
+ "Ligand_RMSD_mean": "Ligand RMSD",
147
+ "coverage_mean": "Coverage",
148
+ "LDDT-PLI_std": "LDDT-PLI std",
149
+ "BiSyRMSD_std": "BiSyRMSD std",
150
+ "LDDT-LP_std": "LDDT-LP std",
151
+ "Ligand_RMSD_std": "Ligand RMSD std",
152
+ "coverage_std": "Coverage std",
153
+ }
154
+ df = df.rename(columns=rename_map)
155
  if for_download:
156
+ pass # keep mean and std columns as-is with full precision
 
 
 
 
157
  else:
158
+ std_cols = [c for c in df.columns if c.endswith(" std")]
159
  df = df.drop(columns=std_cols)
 
 
 
 
 
 
 
 
 
160
  for col in ["LDDT-PLI", "BiSyRMSD", "LDDT-LP", "Ligand RMSD", "Coverage"]:
161
  if col in df.columns:
162
  df[col] = df[col].round(4)