LordofMonarchs's picture
Upload folder using huggingface_hub
3751f09 verified
Raw
History Blame Contribute Delete
6.65 kB
from __future__ import annotations
import argparse
import os
import re
import sys
import pandas as pd
def validate_submission(submission_path: str) -> bool:
"""
Run all format validation checks on submission.csv.
Returns True if all checks pass, False if any fail.
Prints detailed output for each check.
"""
errors = []
warnings = []
print("=" * 60)
print("SUBMISSION VALIDATOR")
print(f"File: {submission_path}")
print("=" * 60)
# file existence
if not os.path.isfile(submission_path):
print(f"\n[FAIL] File not found: {submission_path}")
return False
try:
df = pd.read_csv(submission_path, dtype={"candidate_id": str, "reasoning": str})
except Exception as e:
print(f"\n[FAIL] Cannot parse CSV: {e}")
return False
print(f"\nParsed: {len(df)} rows × {len(df.columns)} columns")
required_cols = ["candidate_id", "rank", "score", "reasoning"]
if list(df.columns) != required_cols:
missing = set(required_cols) - set(df.columns)
extra = set(df.columns) - set(required_cols)
wrong_order = set(df.columns) == set(required_cols) and list(df.columns) != required_cols
if missing:
errors.append(f"Missing columns: {sorted(missing)}")
if extra:
errors.append(f"Extra columns (not allowed): {sorted(extra)}")
if wrong_order:
errors.append(
f"Column order wrong. Expected: {required_cols}, "
f"Got: {list(df.columns)}"
)
if errors:
for e in errors:
print(f"[FAIL] {e}")
return False
if len(df) != 100:
errors.append(f"Expected exactly 100 rows, got {len(df)}")
try:
ranks = df["rank"].tolist()
rank_set = set(int(r) for r in ranks)
if rank_set != set(range(1, 101)):
missing_ranks = set(range(1, 101)) - rank_set
extra_ranks = rank_set - set(range(1, 101))
if missing_ranks:
errors.append(f"Missing ranks: {sorted(missing_ranks)[:10]}")
if extra_ranks:
errors.append(f"Invalid ranks (out of 1–100): {sorted(extra_ranks)[:10]}")
if len(ranks) != len(set(ranks)):
errors.append("Duplicate ranks found")
except (TypeError, ValueError) as e:
errors.append(f"Rank column contains non-integer values: {e}")
try:
scores = pd.to_numeric(df["score"], errors="raise")
if scores.isna().any():
errors.append("Score column contains NaN values")
else:
if scores.min() < 0:
errors.append(f"Score below 0: min={scores.min():.6f}")
if scores.max() > 1.0001:
errors.append(f"Score above 1: max={scores.max():.6f}")
except ValueError as e:
errors.append(f"Score column contains non-numeric values: {e}")
try:
df_sorted = df.copy()
df_sorted["rank_int"] = pd.to_numeric(df_sorted["rank"], errors="coerce")
df_sorted = df_sorted.sort_values("rank_int")
score_vals = pd.to_numeric(df_sorted["score"], errors="coerce").values
violations = []
for i in range(1, len(score_vals)):
if score_vals[i] > score_vals[i - 1] + 1e-9:
violations.append(
f"rank {i}{i+1}: {score_vals[i-1]:.6f}{score_vals[i]:.6f}"
)
if violations:
errors.append(
f"Monotonicity violated at {len(violations)} positions: "
f"{violations[:3]}"
)
except Exception as e:
errors.append(f"Could not check monotonicity: {e}")
if df["candidate_id"].isna().any():
errors.append("candidate_id column contains NaN values")
else:
if df["candidate_id"].duplicated().any():
dups = df[df["candidate_id"].duplicated()]["candidate_id"].tolist()
errors.append(f"Duplicate candidate_ids: {dups[:5]}")
bad_format = [
cid for cid in df["candidate_id"]
if not re.match(r'^(CAND_\d{7}|SYNTH_[A-Z_]+)$', str(cid))
]
if bad_format:
warnings.append(
f"{len(bad_format)} candidate_ids don't match CAND_XXXXXXX format: "
f"{bad_format[:3]}"
)
if df["reasoning"].isna().any():
errors.append(f"{df['reasoning'].isna().sum()} reasoning fields are null")
empty_reasoning = df["reasoning"].fillna("").str.strip() == ""
if empty_reasoning.any():
errors.append(f"{empty_reasoning.sum()} reasoning fields are empty")
# check reasonable length (warn if very short)
short_reasoning = df["reasoning"].fillna("").str.len() < 20
if short_reasoning.any():
warnings.append(
f"{short_reasoning.sum()} reasoning fields are very short (<20 chars)"
)
stripped = df["candidate_id"].str.strip()
if (stripped != df["candidate_id"]).any():
errors.append("Some candidate_ids have leading/trailing whitespace")
print()
if errors:
print(f"RESULT: FAIL ({len(errors)} error(s), {len(warnings)} warning(s))\n")
for e in errors:
print(f" [FAIL] {e}")
for w in warnings:
print(f" [WARN] {w}")
return False
else:
print(f"RESULT: PASS (0 errors, {len(warnings)} warning(s))\n")
df_sorted = df.sort_values("rank")
scores = pd.to_numeric(df_sorted["score"])
print(f" Rows: {len(df)}")
print(f" Ranks: 1–{int(df['rank'].max())}")
print(f" Score range: [{scores.min():.6f}, {scores.max():.6f}]")
print(f" Avg reasoning length: {df['reasoning'].str.len().mean():.0f} chars")
print(f" Distinct candidate_ids: {df['candidate_id'].nunique()}")
for w in warnings:
print(f"\n [WARN] {w}")
print("\nSAFE TO SUBMIT [PASS]")
return True
def main():
parser = argparse.ArgumentParser(
description="Validate submission.csv against the Redrob spec checklist"
)
parser.add_argument(
"--submission",
default="./CTRL_COFFEE_REPEAT.csv",
help="Path to CTRL_COFFEE_REPEAT.csv to validate",
)
args = parser.parse_args()
passed = validate_submission(os.path.abspath(args.submission))
sys.exit(0 if passed else 1)
if __name__ == "__main__":
main()