| from __future__ import annotations
|
|
|
| import argparse
|
| import os
|
| import re
|
| import sys
|
|
|
| import pandas as pd
|
|
|
|
|
| def validate_submission(submission_path: str) -> bool:
|
| """
|
| Run all format validation checks on submission.csv.
|
|
|
| Returns True if all checks pass, False if any fail.
|
| Prints detailed output for each check.
|
| """
|
| errors = []
|
| warnings = []
|
|
|
| print("=" * 60)
|
| print("SUBMISSION VALIDATOR")
|
| print(f"File: {submission_path}")
|
| print("=" * 60)
|
|
|
|
|
| if not os.path.isfile(submission_path):
|
| print(f"\n[FAIL] File not found: {submission_path}")
|
| return False
|
|
|
| try:
|
| df = pd.read_csv(submission_path, dtype={"candidate_id": str, "reasoning": str})
|
| except Exception as e:
|
| print(f"\n[FAIL] Cannot parse CSV: {e}")
|
| return False
|
|
|
| print(f"\nParsed: {len(df)} rows × {len(df.columns)} columns")
|
|
|
|
|
| required_cols = ["candidate_id", "rank", "score", "reasoning"]
|
| if list(df.columns) != required_cols:
|
| missing = set(required_cols) - set(df.columns)
|
| extra = set(df.columns) - set(required_cols)
|
| wrong_order = set(df.columns) == set(required_cols) and list(df.columns) != required_cols
|
|
|
| if missing:
|
| errors.append(f"Missing columns: {sorted(missing)}")
|
| if extra:
|
| errors.append(f"Extra columns (not allowed): {sorted(extra)}")
|
| if wrong_order:
|
| errors.append(
|
| f"Column order wrong. Expected: {required_cols}, "
|
| f"Got: {list(df.columns)}"
|
| )
|
|
|
| if errors:
|
| for e in errors:
|
| print(f"[FAIL] {e}")
|
| return False
|
|
|
|
|
| if len(df) != 100:
|
| errors.append(f"Expected exactly 100 rows, got {len(df)}")
|
|
|
| try:
|
| ranks = df["rank"].tolist()
|
| rank_set = set(int(r) for r in ranks)
|
| if rank_set != set(range(1, 101)):
|
| missing_ranks = set(range(1, 101)) - rank_set
|
| extra_ranks = rank_set - set(range(1, 101))
|
| if missing_ranks:
|
| errors.append(f"Missing ranks: {sorted(missing_ranks)[:10]}")
|
| if extra_ranks:
|
| errors.append(f"Invalid ranks (out of 1–100): {sorted(extra_ranks)[:10]}")
|
| if len(ranks) != len(set(ranks)):
|
| errors.append("Duplicate ranks found")
|
| except (TypeError, ValueError) as e:
|
| errors.append(f"Rank column contains non-integer values: {e}")
|
|
|
| try:
|
| scores = pd.to_numeric(df["score"], errors="raise")
|
| if scores.isna().any():
|
| errors.append("Score column contains NaN values")
|
| else:
|
| if scores.min() < 0:
|
| errors.append(f"Score below 0: min={scores.min():.6f}")
|
| if scores.max() > 1.0001:
|
| errors.append(f"Score above 1: max={scores.max():.6f}")
|
| except ValueError as e:
|
| errors.append(f"Score column contains non-numeric values: {e}")
|
|
|
| try:
|
| df_sorted = df.copy()
|
| df_sorted["rank_int"] = pd.to_numeric(df_sorted["rank"], errors="coerce")
|
| df_sorted = df_sorted.sort_values("rank_int")
|
| score_vals = pd.to_numeric(df_sorted["score"], errors="coerce").values
|
|
|
| violations = []
|
| for i in range(1, len(score_vals)):
|
| if score_vals[i] > score_vals[i - 1] + 1e-9:
|
| violations.append(
|
| f"rank {i} → {i+1}: {score_vals[i-1]:.6f} → {score_vals[i]:.6f}"
|
| )
|
|
|
| if violations:
|
| errors.append(
|
| f"Monotonicity violated at {len(violations)} positions: "
|
| f"{violations[:3]}"
|
| )
|
| except Exception as e:
|
| errors.append(f"Could not check monotonicity: {e}")
|
|
|
| if df["candidate_id"].isna().any():
|
| errors.append("candidate_id column contains NaN values")
|
| else:
|
| if df["candidate_id"].duplicated().any():
|
| dups = df[df["candidate_id"].duplicated()]["candidate_id"].tolist()
|
| errors.append(f"Duplicate candidate_ids: {dups[:5]}")
|
|
|
|
|
| bad_format = [
|
| cid for cid in df["candidate_id"]
|
| if not re.match(r'^(CAND_\d{7}|SYNTH_[A-Z_]+)$', str(cid))
|
| ]
|
| if bad_format:
|
| warnings.append(
|
| f"{len(bad_format)} candidate_ids don't match CAND_XXXXXXX format: "
|
| f"{bad_format[:3]}"
|
| )
|
|
|
| if df["reasoning"].isna().any():
|
| errors.append(f"{df['reasoning'].isna().sum()} reasoning fields are null")
|
|
|
| empty_reasoning = df["reasoning"].fillna("").str.strip() == ""
|
| if empty_reasoning.any():
|
| errors.append(f"{empty_reasoning.sum()} reasoning fields are empty")
|
|
|
|
|
| short_reasoning = df["reasoning"].fillna("").str.len() < 20
|
| if short_reasoning.any():
|
| warnings.append(
|
| f"{short_reasoning.sum()} reasoning fields are very short (<20 chars)"
|
| )
|
|
|
| stripped = df["candidate_id"].str.strip()
|
| if (stripped != df["candidate_id"]).any():
|
| errors.append("Some candidate_ids have leading/trailing whitespace")
|
|
|
|
|
| print()
|
| if errors:
|
| print(f"RESULT: FAIL ({len(errors)} error(s), {len(warnings)} warning(s))\n")
|
| for e in errors:
|
| print(f" [FAIL] {e}")
|
| for w in warnings:
|
| print(f" [WARN] {w}")
|
| return False
|
| else:
|
| print(f"RESULT: PASS (0 errors, {len(warnings)} warning(s))\n")
|
|
|
| df_sorted = df.sort_values("rank")
|
| scores = pd.to_numeric(df_sorted["score"])
|
| print(f" Rows: {len(df)}")
|
| print(f" Ranks: 1–{int(df['rank'].max())}")
|
| print(f" Score range: [{scores.min():.6f}, {scores.max():.6f}]")
|
| print(f" Avg reasoning length: {df['reasoning'].str.len().mean():.0f} chars")
|
| print(f" Distinct candidate_ids: {df['candidate_id'].nunique()}")
|
|
|
| for w in warnings:
|
| print(f"\n [WARN] {w}")
|
|
|
| print("\nSAFE TO SUBMIT [PASS]")
|
| return True
|
|
|
|
|
| def main():
|
| parser = argparse.ArgumentParser(
|
| description="Validate submission.csv against the Redrob spec checklist"
|
| )
|
| parser.add_argument(
|
| "--submission",
|
| default="./CTRL_COFFEE_REPEAT.csv",
|
| help="Path to CTRL_COFFEE_REPEAT.csv to validate",
|
| )
|
| args = parser.parse_args()
|
|
|
| passed = validate_submission(os.path.abspath(args.submission))
|
| sys.exit(0 if passed else 1)
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|