graphtestbed / agents /mlevolve /adapter.py
Zhu Jiajun (jz28583)
arxiv-citation: ship the heterograph (citations + author/category tables)
0309359
"""GraphTestbed task β†’ mle-bench-shaped data tree.
mle-bench expects, per experiment ID:
<root>/<exp_id>/prepared/public/{train.csv,test.csv,description.md,sample_submission.csv}
GraphTestbed's test labels live only on the scoring server, so the agent
cannot be auto-scored against `test_features.csv` locally. v1 strategy:
- Stage `val_features.csv` (with labels) as the "test" the agent
searches against. MLEvolve's grader can score val predictions locally,
which is what drives MCGS exploration.
- Stash the real `test_features.csv` next to the staged tree as
`<root>/<exp_id>/REAL_TEST_FEATURES.csv` so users can re-execute the
best runfile.py against it after the search finishes.
This is documented as a known limitation in agents/mlevolve/README.md.
"""
from __future__ import annotations
from pathlib import Path
import pandas as pd
from agents.common.tasks import task_instruction
from graphtestbed._manifest import task_config
from graphtestbed.fetch import cache_dir
def stage(task: str, root: Path) -> Path:
"""Build <root>/<task>/prepared/{public,private}/. Return the prepared dir."""
cfg = task_config(task)
s = cfg["submission_schema"]
src = cache_dir() / task
if not src.exists():
raise SystemExit(
f"No cached dataset at {src}. Run `gtb fetch {task}` first."
)
base = root / task / "prepared"
pub = base / "public"
priv = base / "private"
pub.mkdir(parents=True, exist_ok=True)
priv.mkdir(parents=True, exist_ok=True)
train = pd.read_csv(src / "train_features.csv")
val = pd.read_csv(src / "val_features.csv")
test = pd.read_csv(src / "test_features.csv")
if s["pred_col"] not in val.columns:
raise SystemExit(
f"val_features.csv has no `{s['pred_col']}` column β€” cannot use "
f"val as the local-grading split for task {task}."
)
# Public tree (what the agent sees). val_no_label = val minus label β†’
# served as `test.csv` so the agent's runfile predicts on it.
val_no_label = val.drop(columns=[s["pred_col"]])
train.to_csv(pub / "train.csv", index=False)
val_no_label.to_csv(pub / "test.csv", index=False)
sample = val_no_label[[s["id_col"]]].copy()
sample[s["pred_col"]] = 0.5
sample.to_csv(pub / "sample_submission.csv", index=False)
(pub / "description.md").write_text(task_instruction(task))
# Private tree: val with labels β€” the local grader checks submission
# against this.
val[[s["id_col"], s["pred_col"]]].rename(
columns={s["pred_col"]: "Label"}
).to_csv(priv / "test.csv", index=False)
# Stash the real test set for post-search re-execution by the user.
test.to_csv(root / task / "REAL_TEST_FEATURES.csv", index=False)
# Forward any additional task data files declared in the manifest (graph
# edges, relation tables, …) into the public tree so the agent can build
# a real graph model instead of treating the task as pure tabular.
canonical = {"train_features.csv", "val_features.csv",
"test_features.csv", "sample_submission.csv"}
for spec in cfg["files"].values():
fn = spec["filename"]
if fn in canonical:
continue
src_path = src / fn
if src_path.exists():
(pub / fn).write_bytes(src_path.read_bytes())
return base