Spaces:
Sleeping
Sleeping
File size: 3,406 Bytes
d094faf 0309359 d094faf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | """GraphTestbed task → mle-bench-shaped data tree.
mle-bench expects, per experiment ID:
<root>/<exp_id>/prepared/public/{train.csv,test.csv,description.md,sample_submission.csv}
GraphTestbed's test labels live only on the scoring server, so the agent
cannot be auto-scored against `test_features.csv` locally. v1 strategy:
- Stage `val_features.csv` (with labels) as the "test" the agent
searches against. MLEvolve's grader can score val predictions locally,
which is what drives MCGS exploration.
- Stash the real `test_features.csv` next to the staged tree as
`<root>/<exp_id>/REAL_TEST_FEATURES.csv` so users can re-execute the
best runfile.py against it after the search finishes.
This is documented as a known limitation in agents/mlevolve/README.md.
"""
from __future__ import annotations
from pathlib import Path
import pandas as pd
from agents.common.tasks import task_instruction
from graphtestbed._manifest import task_config
from graphtestbed.fetch import cache_dir
def stage(task: str, root: Path) -> Path:
"""Build <root>/<task>/prepared/{public,private}/. Return the prepared dir."""
cfg = task_config(task)
s = cfg["submission_schema"]
src = cache_dir() / task
if not src.exists():
raise SystemExit(
f"No cached dataset at {src}. Run `gtb fetch {task}` first."
)
base = root / task / "prepared"
pub = base / "public"
priv = base / "private"
pub.mkdir(parents=True, exist_ok=True)
priv.mkdir(parents=True, exist_ok=True)
train = pd.read_csv(src / "train_features.csv")
val = pd.read_csv(src / "val_features.csv")
test = pd.read_csv(src / "test_features.csv")
if s["pred_col"] not in val.columns:
raise SystemExit(
f"val_features.csv has no `{s['pred_col']}` column — cannot use "
f"val as the local-grading split for task {task}."
)
# Public tree (what the agent sees). val_no_label = val minus label →
# served as `test.csv` so the agent's runfile predicts on it.
val_no_label = val.drop(columns=[s["pred_col"]])
train.to_csv(pub / "train.csv", index=False)
val_no_label.to_csv(pub / "test.csv", index=False)
sample = val_no_label[[s["id_col"]]].copy()
sample[s["pred_col"]] = 0.5
sample.to_csv(pub / "sample_submission.csv", index=False)
(pub / "description.md").write_text(task_instruction(task))
# Private tree: val with labels — the local grader checks submission
# against this.
val[[s["id_col"], s["pred_col"]]].rename(
columns={s["pred_col"]: "Label"}
).to_csv(priv / "test.csv", index=False)
# Stash the real test set for post-search re-execution by the user.
test.to_csv(root / task / "REAL_TEST_FEATURES.csv", index=False)
# Forward any additional task data files declared in the manifest (graph
# edges, relation tables, …) into the public tree so the agent can build
# a real graph model instead of treating the task as pure tabular.
canonical = {"train_features.csv", "val_features.csv",
"test_features.csv", "sample_submission.csv"}
for spec in cfg["files"].values():
fn = spec["filename"]
if fn in canonical:
continue
src_path = src / fn
if src_path.exists():
(pub / fn).write_bytes(src_path.read_bytes())
return base
|