Zhu Jiajun (jz28583) Claude Opus 4.7 (1M context) commited on
Commit
769f2a4
·
1 Parent(s): 87f5650

binary check: keep float dtype during validation, reject non-{0,1}

Browse files

Old check cast to int first, which silently truncated probabilities
(0.0007 → 0) so prob CSVs slipped past the binary gate. Stay in float
land and require exact equality with 0.0 or 1.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. graphtestbed/submit.py +7 -5
  2. server/api.py +5 -5
graphtestbed/submit.py CHANGED
@@ -61,15 +61,17 @@ def validate_submission(task: str, csv_path: Path) -> dict:
61
  if (preds < 0).any() or (preds > 1).any():
62
  raise SystemExit("predictions must lie in [0, 1]")
63
  elif dtype == "binary":
 
64
  try:
65
- preds = df[s["pred_col"]].astype(int)
66
  except (TypeError, ValueError) as e:
67
- raise SystemExit(f"pred_col not int-castable: {e}")
68
- bad = ~preds.isin([0, 1])
69
  if bad.any():
70
  raise SystemExit(
71
- f"binary submission must contain only 0 or 1; "
72
- f"got {int(bad.sum())} other values"
 
73
  )
74
 
75
  return {
 
61
  if (preds < 0).any() or (preds > 1).any():
62
  raise SystemExit("predictions must lie in [0, 1]")
63
  elif dtype == "binary":
64
+ raw = df[s["pred_col"]]
65
  try:
66
+ preds = raw.astype(float)
67
  except (TypeError, ValueError) as e:
68
+ raise SystemExit(f"pred_col not numeric: {e}")
69
+ bad = ~preds.isin([0.0, 1.0])
70
  if bad.any():
71
  raise SystemExit(
72
+ f"binary submission must contain only 0 or 1 "
73
+ f"(no probabilities); got {int(bad.sum())} other values "
74
+ f"(e.g. {preds[bad].iloc[0]!r})"
75
  )
76
 
77
  return {
server/api.py CHANGED
@@ -221,14 +221,14 @@ def _validate_schema(sub_df: pd.DataFrame, cfg: dict) -> None:
221
  raise ValueError("predictions must lie in [0, 1]")
222
  elif dtype == "binary":
223
  try:
224
- preds = sub_df[s["pred_col"]].astype(int)
225
  except (TypeError, ValueError) as e:
226
- raise ValueError(f"pred_col not int-castable: {e}")
227
- bad = ~preds.isin([0, 1])
228
  if bad.any():
229
  raise ValueError(
230
- f"binary submission must contain only 0 or 1; "
231
- f"got {int(bad.sum())} other values"
232
  )
233
 
234
 
 
221
  raise ValueError("predictions must lie in [0, 1]")
222
  elif dtype == "binary":
223
  try:
224
+ preds = sub_df[s["pred_col"]].astype(float)
225
  except (TypeError, ValueError) as e:
226
+ raise ValueError(f"pred_col not numeric: {e}")
227
+ bad = ~preds.isin([0.0, 1.0])
228
  if bad.any():
229
  raise ValueError(
230
+ f"binary submission must contain only 0 or 1 "
231
+ f"(no probabilities); got {int(bad.sum())} other values"
232
  )
233
 
234