File size: 3,611 Bytes
d094faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0309359
d094faf
bd3e9ac
 
 
 
 
0309359
d094faf
 
 
 
 
 
 
 
b3dbec5
d094faf
 
 
 
 
 
 
 
 
b3dbec5
 
 
 
 
 
d094faf
0309359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d094faf
 
 
 
 
 
 
b3dbec5
 
 
d094faf
 
 
0309359
d094faf
 
 
b3dbec5
 
d094faf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""Render a per-task instruction markdown for any agent.

Pulls the canonical task description from datasets/manifest.yaml and decorates
it with the submission contract (id col, pred col, n rows, metric).

Per-task overrides β€” handcrafted prompts that beat the auto-generated text β€”
live in agents/common/tasks_md/<task>.md and take priority when present.
"""

from __future__ import annotations

from pathlib import Path

from graphtestbed._manifest import task_config

_TEMPLATE = """\
# Task: {task}

{description}

## Files you will see

{files_block}

These are pulled from `lanczos/graphtestbed-data` on HuggingFace (subdir
`{task}/`). **Train and HPO on these files only** β€” do not pull from the
upstream source mentioned above to recover test labels. The benchmark is
non-adversarial; we trust agent authors to honor the contract.

The label column is present in train/val and absent from test.

## Submission format

Write a CSV with **exactly two columns**, in this order:

| column | type | meaning |
| --- | --- | --- |
| `{id_col}` | id | matches `test_features.csv[{id_col}]` 100% |
| `{pred_col}` | {pred_type_desc} | {pred_meaning} |

Row count: **{n_rows}**.

## Metric

You will be evaluated on `{primary}` (primary). Secondary: {secondary}.
Optimize for the primary metric.
"""

_DTYPE_DESC = {
    "float": ("float in [0, 1]", "predicted score (probability)"),
    "binary": ("0 or 1", "predicted hard class β€” pick your own threshold "
                          "(e.g. tune on val to maximize the primary metric)"),
}


_KNOWN_FILE_HINTS = {
    "train_features.csv": "labeled training rows",
    "val_features.csv":   "labeled validation rows (use for HPO / early stopping)",
    "test_features.csv":  "**unlabeled** test rows; predict here",
    "sample_submission.csv": "the schema you must match (column order + row IDs)",
}


def _files_block(cfg: dict) -> str:
    """Render every file declared in manifest, with a known hint when we have
    one β€” otherwise just the filename so the agent knows it's available."""
    lines = []
    seen = set()
    # Show the canonical four first in a fixed order, then everything else
    # (graph tables, edges, etc.) in manifest declaration order.
    canonical = ["train_features.csv", "val_features.csv",
                 "test_features.csv", "sample_submission.csv"]
    by_name = {spec["filename"]: key for key, spec in cfg["files"].items()}
    for fn in canonical:
        if fn in by_name:
            lines.append(f"- `{fn}` β€” {_KNOWN_FILE_HINTS[fn]}")
            seen.add(fn)
    for key, spec in cfg["files"].items():
        fn = spec["filename"]
        if fn in seen:
            continue
        hint = _KNOWN_FILE_HINTS.get(fn, "additional task data (see description above)")
        lines.append(f"- `{fn}` β€” {hint}")
    return "\n".join(lines)


def task_instruction(task: str) -> str:
    override = Path(__file__).parent / "tasks_md" / f"{task}.md"
    if override.exists():
        return override.read_text()
    cfg = task_config(task)
    s = cfg["submission_schema"]
    m = cfg["metric"]
    type_desc, meaning = _DTYPE_DESC.get(
        s.get("pred_dtype", "float"), _DTYPE_DESC["float"]
    )
    return _TEMPLATE.format(
        task=task,
        description=str(cfg.get("description", "")).strip(),
        files_block=_files_block(cfg),
        id_col=s["id_col"],
        pred_col=s["pred_col"],
        n_rows=s.get("n_rows", "?"),
        pred_type_desc=type_desc,
        pred_meaning=meaning,
        primary=m["primary"],
        secondary=", ".join(m.get("secondary", [])) or "(none)",
    )