Spaces:
Sleeping
Sleeping
File size: 1,488 Bytes
7952f32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | """Training: multi-turn rollout for GRPO / SFT.
Public surface:
EnvClient — protocol; HttpEnvClient or InProcessEnvClient
Policy — protocol; ScriptedPolicy or HfPolicy
rollout(...) — drive one episode, return Trajectory
Trajectory, TurnSample — per-turn (prompt, completion, reward, return)
The rollout is environment-agnostic and policy-agnostic — see
PROPOSAL.md §7.2 for the GRPOTrainer integration story.
"""
from graphforge.training.client import (
EnvClient,
HttpEnvClient,
InProcessEnvClient,
)
from graphforge.training.policy import HfPolicy, Policy, ScriptedPolicy
from graphforge.training.protocol import (
ParseFailure,
ParseSuccess,
parse_completion,
render_action,
)
from graphforge.training.rollout import (
Trajectory,
TurnSample,
rollout,
trajectory_summary,
)
__all__ = [
"EnvClient",
"HfPolicy",
"HttpEnvClient",
"InProcessEnvClient",
"ParseFailure",
"ParseSuccess",
"Policy",
"ScriptedPolicy",
"Trajectory",
"TurnSample",
"parse_completion",
"render_action",
"rollout",
"trajectory_summary",
]
def train_grpo(config: object) -> None: # pragma: no cover — TODO
raise NotImplementedError("GRPO training TODO — see PROPOSAL.md §7")
def train_sft(config: object) -> None: # pragma: no cover — TODO
raise NotImplementedError("SFT plan B TODO — see PROPOSAL.md §7.4")
|