Spaces:

lanczos
/

graphtestbed

Sleeping

graphtestbed / agents /mlevolve /adapter.py

Zhu Jiajun (jz28583)

arxiv-citation: ship the heterograph (citations + author/category tables)

0309359 20 days ago

3.41 kB

	"""GraphTestbed task → mle-bench-shaped data tree.

	mle-bench expects, per experiment ID:

	<root>/<exp_id>/prepared/public/{train.csv,test.csv,description.md,sample_submission.csv}

	GraphTestbed's test labels live only on the scoring server, so the agent
	cannot be auto-scored against `test_features.csv` locally. v1 strategy:

	- Stage `val_features.csv` (with labels) as the "test" the agent
	searches against. MLEvolve's grader can score val predictions locally,
	which is what drives MCGS exploration.
	- Stash the real `test_features.csv` next to the staged tree as
	`<root>/<exp_id>/REAL_TEST_FEATURES.csv` so users can re-execute the
	best runfile.py against it after the search finishes.

	This is documented as a known limitation in agents/mlevolve/README.md.
	"""

	from __future__ import annotations

	from pathlib import Path

	import pandas as pd

	from agents.common.tasks import task_instruction
	from graphtestbed._manifest import task_config
	from graphtestbed.fetch import cache_dir


	def stage(task: str, root: Path) -> Path:
	"""Build <root>/<task>/prepared/{public,private}/. Return the prepared dir."""
	cfg = task_config(task)
	s = cfg["submission_schema"]

	src = cache_dir() / task
	if not src.exists():
	raise SystemExit(
	f"No cached dataset at {src}. Run `gtb fetch {task}` first."
	)

	base = root / task / "prepared"
	pub = base / "public"
	priv = base / "private"
	pub.mkdir(parents=True, exist_ok=True)
	priv.mkdir(parents=True, exist_ok=True)

	train = pd.read_csv(src / "train_features.csv")
	val = pd.read_csv(src / "val_features.csv")
	test = pd.read_csv(src / "test_features.csv")

	if s["pred_col"] not in val.columns:
	raise SystemExit(
	f"val_features.csv has no `{s['pred_col']}` column — cannot use "
	f"val as the local-grading split for task {task}."
	)

	# Public tree (what the agent sees). val_no_label = val minus label →
	# served as `test.csv` so the agent's runfile predicts on it.
	val_no_label = val.drop(columns=[s["pred_col"]])
	train.to_csv(pub / "train.csv", index=False)
	val_no_label.to_csv(pub / "test.csv", index=False)

	sample = val_no_label[[s["id_col"]]].copy()
	sample[s["pred_col"]] = 0.5
	sample.to_csv(pub / "sample_submission.csv", index=False)

	(pub / "description.md").write_text(task_instruction(task))

	# Private tree: val with labels — the local grader checks submission
	# against this.
	val[[s["id_col"], s["pred_col"]]].rename(
	columns={s["pred_col"]: "Label"}
	).to_csv(priv / "test.csv", index=False)

	# Stash the real test set for post-search re-execution by the user.
	test.to_csv(root / task / "REAL_TEST_FEATURES.csv", index=False)

	# Forward any additional task data files declared in the manifest (graph
	# edges, relation tables, …) into the public tree so the agent can build
	# a real graph model instead of treating the task as pure tabular.
	canonical = {"train_features.csv", "val_features.csv",
	"test_features.csv", "sample_submission.csv"}
	for spec in cfg["files"].values():
	fn = spec["filename"]
	if fn in canonical:
	continue
	src_path = src / fn
	if src_path.exists():
	(pub / fn).write_bytes(src_path.read_bytes())

	return base