Spaces:
Sleeping
Sleeping
File size: 3,611 Bytes
d094faf 0309359 d094faf bd3e9ac 0309359 d094faf b3dbec5 d094faf b3dbec5 d094faf 0309359 d094faf b3dbec5 d094faf 0309359 d094faf b3dbec5 d094faf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | """Render a per-task instruction markdown for any agent.
Pulls the canonical task description from datasets/manifest.yaml and decorates
it with the submission contract (id col, pred col, n rows, metric).
Per-task overrides β handcrafted prompts that beat the auto-generated text β
live in agents/common/tasks_md/<task>.md and take priority when present.
"""
from __future__ import annotations
from pathlib import Path
from graphtestbed._manifest import task_config
_TEMPLATE = """\
# Task: {task}
{description}
## Files you will see
{files_block}
These are pulled from `lanczos/graphtestbed-data` on HuggingFace (subdir
`{task}/`). **Train and HPO on these files only** β do not pull from the
upstream source mentioned above to recover test labels. The benchmark is
non-adversarial; we trust agent authors to honor the contract.
The label column is present in train/val and absent from test.
## Submission format
Write a CSV with **exactly two columns**, in this order:
| column | type | meaning |
| --- | --- | --- |
| `{id_col}` | id | matches `test_features.csv[{id_col}]` 100% |
| `{pred_col}` | {pred_type_desc} | {pred_meaning} |
Row count: **{n_rows}**.
## Metric
You will be evaluated on `{primary}` (primary). Secondary: {secondary}.
Optimize for the primary metric.
"""
_DTYPE_DESC = {
"float": ("float in [0, 1]", "predicted score (probability)"),
"binary": ("0 or 1", "predicted hard class β pick your own threshold "
"(e.g. tune on val to maximize the primary metric)"),
}
_KNOWN_FILE_HINTS = {
"train_features.csv": "labeled training rows",
"val_features.csv": "labeled validation rows (use for HPO / early stopping)",
"test_features.csv": "**unlabeled** test rows; predict here",
"sample_submission.csv": "the schema you must match (column order + row IDs)",
}
def _files_block(cfg: dict) -> str:
"""Render every file declared in manifest, with a known hint when we have
one β otherwise just the filename so the agent knows it's available."""
lines = []
seen = set()
# Show the canonical four first in a fixed order, then everything else
# (graph tables, edges, etc.) in manifest declaration order.
canonical = ["train_features.csv", "val_features.csv",
"test_features.csv", "sample_submission.csv"]
by_name = {spec["filename"]: key for key, spec in cfg["files"].items()}
for fn in canonical:
if fn in by_name:
lines.append(f"- `{fn}` β {_KNOWN_FILE_HINTS[fn]}")
seen.add(fn)
for key, spec in cfg["files"].items():
fn = spec["filename"]
if fn in seen:
continue
hint = _KNOWN_FILE_HINTS.get(fn, "additional task data (see description above)")
lines.append(f"- `{fn}` β {hint}")
return "\n".join(lines)
def task_instruction(task: str) -> str:
override = Path(__file__).parent / "tasks_md" / f"{task}.md"
if override.exists():
return override.read_text()
cfg = task_config(task)
s = cfg["submission_schema"]
m = cfg["metric"]
type_desc, meaning = _DTYPE_DESC.get(
s.get("pred_dtype", "float"), _DTYPE_DESC["float"]
)
return _TEMPLATE.format(
task=task,
description=str(cfg.get("description", "")).strip(),
files_block=_files_block(cfg),
id_col=s["id_col"],
pred_col=s["pred_col"],
n_rows=s.get("n_rows", "?"),
pred_type_desc=type_desc,
pred_meaning=meaning,
primary=m["primary"],
secondary=", ".join(m.get("secondary", [])) or "(none)",
)
|