Spaces:
Sleeping
Sleeping
Zhu Jiajun (jz28583)
arxiv-citation: ship the heterograph (citations + author/category tables)
0309359 | """GraphTestbed task β mle-bench-shaped data tree. | |
| mle-bench expects, per experiment ID: | |
| <root>/<exp_id>/prepared/public/{train.csv,test.csv,description.md,sample_submission.csv} | |
| GraphTestbed's test labels live only on the scoring server, so the agent | |
| cannot be auto-scored against `test_features.csv` locally. v1 strategy: | |
| - Stage `val_features.csv` (with labels) as the "test" the agent | |
| searches against. MLEvolve's grader can score val predictions locally, | |
| which is what drives MCGS exploration. | |
| - Stash the real `test_features.csv` next to the staged tree as | |
| `<root>/<exp_id>/REAL_TEST_FEATURES.csv` so users can re-execute the | |
| best runfile.py against it after the search finishes. | |
| This is documented as a known limitation in agents/mlevolve/README.md. | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| import pandas as pd | |
| from agents.common.tasks import task_instruction | |
| from graphtestbed._manifest import task_config | |
| from graphtestbed.fetch import cache_dir | |
| def stage(task: str, root: Path) -> Path: | |
| """Build <root>/<task>/prepared/{public,private}/. Return the prepared dir.""" | |
| cfg = task_config(task) | |
| s = cfg["submission_schema"] | |
| src = cache_dir() / task | |
| if not src.exists(): | |
| raise SystemExit( | |
| f"No cached dataset at {src}. Run `gtb fetch {task}` first." | |
| ) | |
| base = root / task / "prepared" | |
| pub = base / "public" | |
| priv = base / "private" | |
| pub.mkdir(parents=True, exist_ok=True) | |
| priv.mkdir(parents=True, exist_ok=True) | |
| train = pd.read_csv(src / "train_features.csv") | |
| val = pd.read_csv(src / "val_features.csv") | |
| test = pd.read_csv(src / "test_features.csv") | |
| if s["pred_col"] not in val.columns: | |
| raise SystemExit( | |
| f"val_features.csv has no `{s['pred_col']}` column β cannot use " | |
| f"val as the local-grading split for task {task}." | |
| ) | |
| # Public tree (what the agent sees). val_no_label = val minus label β | |
| # served as `test.csv` so the agent's runfile predicts on it. | |
| val_no_label = val.drop(columns=[s["pred_col"]]) | |
| train.to_csv(pub / "train.csv", index=False) | |
| val_no_label.to_csv(pub / "test.csv", index=False) | |
| sample = val_no_label[[s["id_col"]]].copy() | |
| sample[s["pred_col"]] = 0.5 | |
| sample.to_csv(pub / "sample_submission.csv", index=False) | |
| (pub / "description.md").write_text(task_instruction(task)) | |
| # Private tree: val with labels β the local grader checks submission | |
| # against this. | |
| val[[s["id_col"], s["pred_col"]]].rename( | |
| columns={s["pred_col"]: "Label"} | |
| ).to_csv(priv / "test.csv", index=False) | |
| # Stash the real test set for post-search re-execution by the user. | |
| test.to_csv(root / task / "REAL_TEST_FEATURES.csv", index=False) | |
| # Forward any additional task data files declared in the manifest (graph | |
| # edges, relation tables, β¦) into the public tree so the agent can build | |
| # a real graph model instead of treating the task as pure tabular. | |
| canonical = {"train_features.csv", "val_features.csv", | |
| "test_features.csv", "sample_submission.csv"} | |
| for spec in cfg["files"].values(): | |
| fn = spec["filename"] | |
| if fn in canonical: | |
| continue | |
| src_path = src / fn | |
| if src_path.exists(): | |
| (pub / fn).write_bytes(src_path.read_bytes()) | |
| return base | |