Spaces:

vasiuuu
/

DGX_AI

Paused

App Files Files Community

DGX_AI / codeforge /tasks.py

vasiuuu

Initial commit for CodeForge GRPO training

acf77ab 21 days ago

raw

history blame contribute delete

4.51 kB

	from __future__ import annotations

	from dataclasses import dataclass


	@dataclass(frozen=True)
	class Task:
	task_id: str
	task_level: str
	brief: str
	initial_files: dict[str, str]
	target_score: float
	max_budget: int
	tools: tuple[str, ...]
	hidden_tests: dict[str, str] = () # type: ignore[assignment]
	"""Hidden correctness tests injected by the environment during grading.

	The agent never sees these. They are written into the sandbox temp dir
	alongside the agent's submitted files so pytest runs them automatically.
	This prevents "clean garbage" exploits where syntactically valid but
	semantically wrong code scores perfectly.
	"""


	# -- Hidden test suites (agent never sees these) ----------------------------

	_HIDDEN_EASY = {
	"test_hidden_greet.py": (
	"from __future__ import annotations\n"
	"from main import greet\n\n"
	"def test_greet_alice() -> None:\n"
	' assert greet("Alice") == "Hello, Alice!"\n\n'
	"def test_greet_bob() -> None:\n"
	' assert greet("Bob") == "Hello, Bob!"\n\n'
	"def test_greet_empty() -> None:\n"
	' assert greet("") == "Hello, !"\n'
	),
	}

	_HIDDEN_MEDIUM = {
	"test_hidden_greet.py": (
	"from __future__ import annotations\n"
	"import pytest\n"
	"from main import greet\n\n"
	"def test_greet_alice() -> None:\n"
	' assert greet("Alice") == "Hello, Alice!"\n\n'
	"def test_greet_none_raises() -> None:\n"
	" with pytest.raises(ValueError):\n"
	" greet(None) # type: ignore[arg-type]\n\n"
	"def test_greet_returns_str() -> None:\n"
	' assert isinstance(greet("X"), str)\n'
	),
	}

	_HIDDEN_HARD = {
	"test_hidden_core.py": (
	"from __future__ import annotations\n"
	"import pytest\n"
	"from core import greet\n\n"
	"def test_greet_alice() -> None:\n"
	' assert greet("Alice") == "Hello, Alice!"\n\n'
	"def test_greet_bob() -> None:\n"
	' assert greet("Bob") == "Hello, Bob!"\n\n'
	"def test_greet_returns_str() -> None:\n"
	' assert isinstance(greet("X"), str)\n\n'
	"def test_greet_empty() -> None:\n"
	' assert greet("") == "Hello, !"\n'
	),
	}


	TASKS: tuple[Task, ...] = (
	Task(
	task_id="greet_single_file",
	task_level="easy",
	brief=(
	"Implement `greet(name)` in `main.py` so that `greet(\"Alice\")` returns "
	'`"Hello, Alice!"`. Use type hints. Keep the module under 15 lines.'
	),
	initial_files={"main.py": "def greet(name):\n pass\n"},
	target_score=0.90,
	max_budget=4,
	tools=("ruff", "imports", "mypy", "pytest"),
	hidden_tests=_HIDDEN_EASY,
	),
	Task(
	task_id="greet_with_tests",
	task_level="medium",
	brief=(
	"Extend `main.py` so that `greet(None)` raises `ValueError`, "
	"and add a `test_main.py` with pytest assertions. Keep `ruff` and "
	"`mypy --strict` clean."
	),
	initial_files={
	"main.py": (
	"from __future__ import annotations\n\n\n"
	"def greet(name: str) -> str:\n"
	' return f"Hello, {name}!"\n'
	),
	"test_main.py": "",
	},
	target_score=0.80,
	max_budget=6,
	tools=("ruff", "imports", "mypy", "pytest"),
	hidden_tests=_HIDDEN_MEDIUM,
	),
	Task(
	task_id="multi_file_module",
	task_level="hard",
	brief=(
	"Split into three files: `main.py` (entry), `core.py` (the greet "
	"function), `test_core.py` (tests). Every function must be type-hinted. "
	"All tests pass. `mypy --strict` clean."
	),
	initial_files={
	"main.py": (
	"from __future__ import annotations\n\nfrom core import greet\n\n\n"
	'if __name__ == "__main__":\n'
	' print(greet("World"))\n'
	),
	"core.py": "",
	"test_core.py": "",
	},
	target_score=0.70,
	max_budget=10,
	tools=("ruff", "imports", "mypy", "pytest"),
	hidden_tests=_HIDDEN_HARD,
	),
	)


	def get_task(task_level: str) -> Task:
	for t in TASKS:
	if t.task_level == task_level:
	return t
	msg = f"unknown task_level: {task_level!r} (expected easy\|medium\|hard)"
	raise ValueError(msg)