DGX_AI / codeforge /tasks.py
vasiuuu's picture
Initial commit for CodeForge GRPO training
acf77ab
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class Task:
task_id: str
task_level: str
brief: str
initial_files: dict[str, str]
target_score: float
max_budget: int
tools: tuple[str, ...]
hidden_tests: dict[str, str] = () # type: ignore[assignment]
"""Hidden correctness tests injected by the environment during grading.
The agent never sees these. They are written into the sandbox temp dir
alongside the agent's submitted files so pytest runs them automatically.
This prevents "clean garbage" exploits where syntactically valid but
semantically wrong code scores perfectly.
"""
# -- Hidden test suites (agent never sees these) ----------------------------
_HIDDEN_EASY = {
"test_hidden_greet.py": (
"from __future__ import annotations\n"
"from main import greet\n\n"
"def test_greet_alice() -> None:\n"
' assert greet("Alice") == "Hello, Alice!"\n\n'
"def test_greet_bob() -> None:\n"
' assert greet("Bob") == "Hello, Bob!"\n\n'
"def test_greet_empty() -> None:\n"
' assert greet("") == "Hello, !"\n'
),
}
_HIDDEN_MEDIUM = {
"test_hidden_greet.py": (
"from __future__ import annotations\n"
"import pytest\n"
"from main import greet\n\n"
"def test_greet_alice() -> None:\n"
' assert greet("Alice") == "Hello, Alice!"\n\n'
"def test_greet_none_raises() -> None:\n"
" with pytest.raises(ValueError):\n"
" greet(None) # type: ignore[arg-type]\n\n"
"def test_greet_returns_str() -> None:\n"
' assert isinstance(greet("X"), str)\n'
),
}
_HIDDEN_HARD = {
"test_hidden_core.py": (
"from __future__ import annotations\n"
"import pytest\n"
"from core import greet\n\n"
"def test_greet_alice() -> None:\n"
' assert greet("Alice") == "Hello, Alice!"\n\n'
"def test_greet_bob() -> None:\n"
' assert greet("Bob") == "Hello, Bob!"\n\n'
"def test_greet_returns_str() -> None:\n"
' assert isinstance(greet("X"), str)\n\n'
"def test_greet_empty() -> None:\n"
' assert greet("") == "Hello, !"\n'
),
}
TASKS: tuple[Task, ...] = (
Task(
task_id="greet_single_file",
task_level="easy",
brief=(
"Implement `greet(name)` in `main.py` so that `greet(\"Alice\")` returns "
'`"Hello, Alice!"`. Use type hints. Keep the module under 15 lines.'
),
initial_files={"main.py": "def greet(name):\n pass\n"},
target_score=0.90,
max_budget=4,
tools=("ruff", "imports", "mypy", "pytest"),
hidden_tests=_HIDDEN_EASY,
),
Task(
task_id="greet_with_tests",
task_level="medium",
brief=(
"Extend `main.py` so that `greet(None)` raises `ValueError`, "
"and add a `test_main.py` with pytest assertions. Keep `ruff` and "
"`mypy --strict` clean."
),
initial_files={
"main.py": (
"from __future__ import annotations\n\n\n"
"def greet(name: str) -> str:\n"
' return f"Hello, {name}!"\n'
),
"test_main.py": "",
},
target_score=0.80,
max_budget=6,
tools=("ruff", "imports", "mypy", "pytest"),
hidden_tests=_HIDDEN_MEDIUM,
),
Task(
task_id="multi_file_module",
task_level="hard",
brief=(
"Split into three files: `main.py` (entry), `core.py` (the greet "
"function), `test_core.py` (tests). Every function must be type-hinted. "
"All tests pass. `mypy --strict` clean."
),
initial_files={
"main.py": (
"from __future__ import annotations\n\nfrom core import greet\n\n\n"
'if __name__ == "__main__":\n'
' print(greet("World"))\n'
),
"core.py": "",
"test_core.py": "",
},
target_score=0.70,
max_budget=10,
tools=("ruff", "imports", "mypy", "pytest"),
hidden_tests=_HIDDEN_HARD,
),
)
def get_task(task_level: str) -> Task:
for t in TASKS:
if t.task_level == task_level:
return t
msg = f"unknown task_level: {task_level!r} (expected easy|medium|hard)"
raise ValueError(msg)