graphtestbed / agents /common /tasks.py
Zhu Jiajun (jz28583)
arxiv-citation: ship the heterograph (citations + author/category tables)
0309359
"""Render a per-task instruction markdown for any agent.
Pulls the canonical task description from datasets/manifest.yaml and decorates
it with the submission contract (id col, pred col, n rows, metric).
Per-task overrides β€” handcrafted prompts that beat the auto-generated text β€”
live in agents/common/tasks_md/<task>.md and take priority when present.
"""
from __future__ import annotations
from pathlib import Path
from graphtestbed._manifest import task_config
_TEMPLATE = """\
# Task: {task}
{description}
## Files you will see
{files_block}
These are pulled from `lanczos/graphtestbed-data` on HuggingFace (subdir
`{task}/`). **Train and HPO on these files only** β€” do not pull from the
upstream source mentioned above to recover test labels. The benchmark is
non-adversarial; we trust agent authors to honor the contract.
The label column is present in train/val and absent from test.
## Submission format
Write a CSV with **exactly two columns**, in this order:
| column | type | meaning |
| --- | --- | --- |
| `{id_col}` | id | matches `test_features.csv[{id_col}]` 100% |
| `{pred_col}` | {pred_type_desc} | {pred_meaning} |
Row count: **{n_rows}**.
## Metric
You will be evaluated on `{primary}` (primary). Secondary: {secondary}.
Optimize for the primary metric.
"""
_DTYPE_DESC = {
"float": ("float in [0, 1]", "predicted score (probability)"),
"binary": ("0 or 1", "predicted hard class β€” pick your own threshold "
"(e.g. tune on val to maximize the primary metric)"),
}
_KNOWN_FILE_HINTS = {
"train_features.csv": "labeled training rows",
"val_features.csv": "labeled validation rows (use for HPO / early stopping)",
"test_features.csv": "**unlabeled** test rows; predict here",
"sample_submission.csv": "the schema you must match (column order + row IDs)",
}
def _files_block(cfg: dict) -> str:
"""Render every file declared in manifest, with a known hint when we have
one β€” otherwise just the filename so the agent knows it's available."""
lines = []
seen = set()
# Show the canonical four first in a fixed order, then everything else
# (graph tables, edges, etc.) in manifest declaration order.
canonical = ["train_features.csv", "val_features.csv",
"test_features.csv", "sample_submission.csv"]
by_name = {spec["filename"]: key for key, spec in cfg["files"].items()}
for fn in canonical:
if fn in by_name:
lines.append(f"- `{fn}` β€” {_KNOWN_FILE_HINTS[fn]}")
seen.add(fn)
for key, spec in cfg["files"].items():
fn = spec["filename"]
if fn in seen:
continue
hint = _KNOWN_FILE_HINTS.get(fn, "additional task data (see description above)")
lines.append(f"- `{fn}` β€” {hint}")
return "\n".join(lines)
def task_instruction(task: str) -> str:
override = Path(__file__).parent / "tasks_md" / f"{task}.md"
if override.exists():
return override.read_text()
cfg = task_config(task)
s = cfg["submission_schema"]
m = cfg["metric"]
type_desc, meaning = _DTYPE_DESC.get(
s.get("pred_dtype", "float"), _DTYPE_DESC["float"]
)
return _TEMPLATE.format(
task=task,
description=str(cfg.get("description", "")).strip(),
files_block=_files_block(cfg),
id_col=s["id_col"],
pred_col=s["pred_col"],
n_rows=s.get("n_rows", "?"),
pred_type_desc=type_desc,
pred_meaning=meaning,
primary=m["primary"],
secondary=", ".join(m.get("secondary", [])) or "(none)",
)