Spaces:

dmaheshwar22
/

verifiable-rl-coder

Running

App Files Files Community

verifiable-rl-coder / src /verifiable_rl_coder /benchmarks /mbpp_plus.py

dmaheshwar22

deploy: replace template with real demo

0dd7c80 verified 21 days ago

raw

history blame contribute delete

2.23 kB

	"""MBPP+ loader (EvalPlus-augmented MBPP, ~378 sanitized tasks).

	MBPP+ applies the same test-augmentation treatment as HumanEval+: the original
	MBPP has very thin tests (usually 3 assertions per task); MBPP+ expands each
	problem to ~100 tests and filters out problems whose ground truth is
	ambiguous or buggy.

	Same API as `load_humaneval_plus` — returns a deterministically ordered list
	of `Task`.
	"""

	from __future__ import annotations

	from typing import Any, cast

	from evalplus.data import get_mbpp_plus # type: ignore[import-untyped]

	from .base import Task


	def _extract_mbpp_helpers(canonical_solution: str, entry_point: str) -> str:
	"""Same helper-extraction logic as mbpp_train (kept independent to
	avoid an import cycle between mbpp_plus and mbpp_train)."""
	import re
	pattern = re.compile(
	rf"^def\s+{re.escape(entry_point)}\s*\(",
	re.MULTILINE,
	)
	match = pattern.search(canonical_solution)
	if not match:
	return ""
	raw = canonical_solution[: match.start()].rstrip()
	return raw.replace("\r\n", "\n").replace("\r", "\n").expandtabs(4)


	def load_mbpp_plus() -> list[Task]:
	"""Return all MBPP+ tasks in deterministic task_id order.

	MBPP+ stores tests under `assertion` (raw `assert ...` lines referencing
	the entry_point by name), not `test` (HumanEval+'s `def check(candidate)`
	wrapper). We load the assertion block verbatim into `Task.test`. We also
	extract any preamble (imports, classes) from `canonical_solution` into
	`Task.helpers` so the sandbox executor can prepend them at run time.
	"""
	raw = cast("dict[str, dict[str, Any]]", get_mbpp_plus())
	tasks: list[Task] = []
	for task_id, item in raw.items():
	canonical = item["canonical_solution"]
	entry_point = item["entry_point"]
	tasks.append(
	Task(
	task_id=task_id,
	prompt=item["prompt"],
	canonical_solution=canonical,
	test=item["assertion"],
	entry_point=entry_point,
	helpers=_extract_mbpp_helpers(canonical, entry_point),
	)
	)
	tasks.sort(key=lambda t: int(t.task_id.split("/")[-1]))
	return tasks