"""GraphTestbed task → mle-bench-shaped data tree. mle-bench expects, per experiment ID: //prepared/public/{train.csv,test.csv,description.md,sample_submission.csv} GraphTestbed's test labels live only on the scoring server, so the agent cannot be auto-scored against `test_features.csv` locally. v1 strategy: - Stage `val_features.csv` (with labels) as the "test" the agent searches against. MLEvolve's grader can score val predictions locally, which is what drives MCGS exploration. - Stash the real `test_features.csv` next to the staged tree as `//REAL_TEST_FEATURES.csv` so users can re-execute the best runfile.py against it after the search finishes. This is documented as a known limitation in agents/mlevolve/README.md. """ from __future__ import annotations from pathlib import Path import pandas as pd from agents.common.tasks import task_instruction from graphtestbed._manifest import task_config from graphtestbed.fetch import cache_dir def stage(task: str, root: Path) -> Path: """Build //prepared/{public,private}/. Return the prepared dir.""" cfg = task_config(task) s = cfg["submission_schema"] src = cache_dir() / task if not src.exists(): raise SystemExit( f"No cached dataset at {src}. Run `gtb fetch {task}` first." ) base = root / task / "prepared" pub = base / "public" priv = base / "private" pub.mkdir(parents=True, exist_ok=True) priv.mkdir(parents=True, exist_ok=True) train = pd.read_csv(src / "train_features.csv") val = pd.read_csv(src / "val_features.csv") test = pd.read_csv(src / "test_features.csv") if s["pred_col"] not in val.columns: raise SystemExit( f"val_features.csv has no `{s['pred_col']}` column — cannot use " f"val as the local-grading split for task {task}." ) # Public tree (what the agent sees). val_no_label = val minus label → # served as `test.csv` so the agent's runfile predicts on it. val_no_label = val.drop(columns=[s["pred_col"]]) train.to_csv(pub / "train.csv", index=False) val_no_label.to_csv(pub / "test.csv", index=False) sample = val_no_label[[s["id_col"]]].copy() sample[s["pred_col"]] = 0.5 sample.to_csv(pub / "sample_submission.csv", index=False) (pub / "description.md").write_text(task_instruction(task)) # Private tree: val with labels — the local grader checks submission # against this. val[[s["id_col"], s["pred_col"]]].rename( columns={s["pred_col"]: "Label"} ).to_csv(priv / "test.csv", index=False) # Stash the real test set for post-search re-execution by the user. test.to_csv(root / task / "REAL_TEST_FEATURES.csv", index=False) # Forward any additional task data files declared in the manifest (graph # edges, relation tables, …) into the public tree so the agent can build # a real graph model instead of treating the task as pure tabular. canonical = {"train_features.csv", "val_features.csv", "test_features.csv", "sample_submission.csv"} for spec in cfg["files"].values(): fn = spec["filename"] if fn in canonical: continue src_path = src / fn if src_path.exists(): (pub / fn).write_bytes(src_path.read_bytes()) return base