Spaces:

dmaheshwar22
/

verifiable-rl-coder

Running

deploy: replace template with real demo

0dd7c80 verified 20 days ago

1.08 kB

	"""Shared Agent protocol.

	Every agent that proposes candidate solutions — the base proposer, SFT-tuned
	model, GRPO-tuned model — implements this interface. Downstream code
	(benchmarks, reward evaluation, training rollouts) depends only on the
	protocol, never on a concrete backend.
	"""

	from __future__ import annotations

	from typing import Protocol


	class Agent(Protocol):
	"""Generates candidate solutions for natural-language / signature-style prompts."""

	def generate(
	self,
	prompts: list[str],
	*,
	n: int = 1,
	temperature: float = 0.2,
	top_p: float = 0.95,
	max_tokens: int = 1024,
	) -> list[list[str]]:
	"""For each prompt, return `n` candidate completions.

	Returns a list of length `len(prompts)`; each element is a list of
	length `n` containing the post-extracted Python source of each
	candidate solution (no markdown fences, no commentary).
	"""
	...

	def close(self) -> None:
	"""Release any model / GPU resources. Idempotent."""
	...